Open-Meteo API data Analysis (Las Vegas, NV)¶
In [25]:
# Notes :
# Location to be analyzed, located at similar Latitude and geographics, but has very different weather (rain precipitation):
# a. Las Vegas (dry), coord: 36.1716° N, 115.1391° W
# b. Nashville (wet), coord: 36.1627° N, 86.7816° W
###############################################################################################
import openmeteo_requests
import requests_cache
from retry_requests import retry
import pandas as pd
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
"latitude": 36.175,
"longitude": -115.1372,
"hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation_probability", "precipitation", "rain", "showers", "pressure_msl", "surface_pressure", "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high", "visibility", "wind_speed_10m", "soil_moisture_0_to_1cm", "soil_moisture_1_to_3cm", "soil_moisture_3_to_9cm", "soil_moisture_9_to_27cm", "soil_moisture_27_to_81cm", "evapotranspiration", "snowfall", "snow_depth", "et0_fao_evapotranspiration", "vapour_pressure_deficit", "wind_speed_80m", "wind_speed_120m", "wind_speed_180m", "wind_direction_10m", "wind_direction_80m", "wind_direction_120m", "wind_direction_180m", "temperature_80m", "temperature_120m", "temperature_180m", "soil_temperature_0cm", "soil_temperature_6cm", "soil_temperature_18cm", "soil_temperature_54cm", "wind_gusts_10m"],
"past_days": 92,
"forecast_days": 1
}
responses = openmeteo.weather_api(url, params=params)
# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
hourly_precipitation_probability = hourly.Variables(4).ValuesAsNumpy()
hourly_precipitation = hourly.Variables(5).ValuesAsNumpy()
hourly_rain = hourly.Variables(6).ValuesAsNumpy()
hourly_showers = hourly.Variables(7).ValuesAsNumpy()
hourly_pressure_msl = hourly.Variables(8).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(9).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(10).ValuesAsNumpy()
hourly_cloud_cover_low = hourly.Variables(11).ValuesAsNumpy()
hourly_cloud_cover_mid = hourly.Variables(12).ValuesAsNumpy()
hourly_cloud_cover_high = hourly.Variables(13).ValuesAsNumpy()
hourly_visibility = hourly.Variables(14).ValuesAsNumpy()
hourly_wind_speed_10m = hourly.Variables(15).ValuesAsNumpy()
hourly_soil_moisture_0_to_1cm = hourly.Variables(16).ValuesAsNumpy()
hourly_soil_moisture_1_to_3cm = hourly.Variables(17).ValuesAsNumpy()
hourly_soil_moisture_3_to_9cm = hourly.Variables(18).ValuesAsNumpy()
hourly_soil_moisture_9_to_27cm = hourly.Variables(19).ValuesAsNumpy()
hourly_soil_moisture_27_to_81cm = hourly.Variables(20).ValuesAsNumpy()
hourly_evapotranspiration = hourly.Variables(21).ValuesAsNumpy()
hourly_snowfall = hourly.Variables(22).ValuesAsNumpy()
hourly_snow_depth = hourly.Variables(23).ValuesAsNumpy()
hourly_et0_fao_evapotranspiration = hourly.Variables(24).ValuesAsNumpy()
hourly_vapour_pressure_deficit = hourly.Variables(25).ValuesAsNumpy()
hourly_wind_speed_80m = hourly.Variables(26).ValuesAsNumpy()
hourly_wind_speed_120m = hourly.Variables(27).ValuesAsNumpy()
hourly_wind_speed_180m = hourly.Variables(28).ValuesAsNumpy()
hourly_wind_direction_10m = hourly.Variables(29).ValuesAsNumpy()
hourly_wind_direction_80m = hourly.Variables(30).ValuesAsNumpy()
hourly_wind_direction_120m = hourly.Variables(31).ValuesAsNumpy()
hourly_wind_direction_180m = hourly.Variables(32).ValuesAsNumpy()
hourly_temperature_80m = hourly.Variables(33).ValuesAsNumpy()
hourly_temperature_120m = hourly.Variables(34).ValuesAsNumpy()
hourly_temperature_180m = hourly.Variables(35).ValuesAsNumpy()
hourly_soil_temperature_0cm = hourly.Variables(36).ValuesAsNumpy()
hourly_soil_temperature_6cm = hourly.Variables(37).ValuesAsNumpy()
hourly_soil_temperature_18cm = hourly.Variables(38).ValuesAsNumpy()
hourly_soil_temperature_54cm = hourly.Variables(39).ValuesAsNumpy()
hourly_wind_gusts_10m = hourly.Variables(40).ValuesAsNumpy()
hourly_data = {"date": pd.date_range(
start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
freq = pd.Timedelta(seconds = hourly.Interval()),
inclusive = "left"
)}
hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["dew_point_2m"] = hourly_dew_point_2m
hourly_data["apparent_temperature"] = hourly_apparent_temperature
hourly_data["precipitation_probability"] = hourly_precipitation_probability
hourly_data["precipitation"] = hourly_precipitation
hourly_data["rain"] = hourly_rain
hourly_data["showers"] = hourly_showers
hourly_data["pressure_msl"] = hourly_pressure_msl
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["cloud_cover_low"] = hourly_cloud_cover_low
hourly_data["cloud_cover_mid"] = hourly_cloud_cover_mid
hourly_data["cloud_cover_high"] = hourly_cloud_cover_high
hourly_data["visibility"] = hourly_visibility
hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
hourly_data["soil_moisture_0_to_1cm"] = hourly_soil_moisture_0_to_1cm
hourly_data["soil_moisture_1_to_3cm"] = hourly_soil_moisture_1_to_3cm
hourly_data["soil_moisture_3_to_9cm"] = hourly_soil_moisture_3_to_9cm
hourly_data["soil_moisture_9_to_27cm"] = hourly_soil_moisture_9_to_27cm
hourly_data["soil_moisture_27_to_81cm"] = hourly_soil_moisture_27_to_81cm
hourly_data["evapotranspiration"] = hourly_evapotranspiration
hourly_data["snowfall"] = hourly_snowfall
hourly_data["snow_depth"] = hourly_snow_depth
hourly_data["et0_fao_evapotranspiration"] = hourly_et0_fao_evapotranspiration
hourly_data["vapour_pressure_deficit"] = hourly_vapour_pressure_deficit
hourly_data["wind_speed_80m"] = hourly_wind_speed_80m
hourly_data["wind_speed_120m"] = hourly_wind_speed_120m
hourly_data["wind_speed_180m"] = hourly_wind_speed_180m
hourly_data["wind_direction_10m"] = hourly_wind_direction_10m
hourly_data["wind_direction_80m"] = hourly_wind_direction_80m
hourly_data["wind_direction_120m"] = hourly_wind_direction_120m
hourly_data["wind_direction_180m"] = hourly_wind_direction_180m
hourly_data["temperature_80m"] = hourly_temperature_80m
hourly_data["temperature_120m"] = hourly_temperature_120m
hourly_data["temperature_180m"] = hourly_temperature_180m
hourly_data["soil_temperature_0cm"] = hourly_soil_temperature_0cm
hourly_data["soil_temperature_6cm"] = hourly_soil_temperature_6cm
hourly_data["soil_temperature_18cm"] = hourly_soil_temperature_18cm
hourly_data["soil_temperature_54cm"] = hourly_soil_temperature_54cm
hourly_data["wind_gusts_10m"] = hourly_wind_gusts_10m
hourly_dataframe = pd.DataFrame(data = hourly_data)
print(hourly_dataframe)
Coordinates 36.16437911987305°N -115.1439208984375°E
Elevation 609.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s
date temperature_2m relative_humidity_2m \
0 2025-03-01 00:00:00+00:00 NaN NaN
1 2025-03-01 01:00:00+00:00 NaN NaN
2 2025-03-01 02:00:00+00:00 NaN NaN
3 2025-03-01 03:00:00+00:00 NaN NaN
4 2025-03-01 04:00:00+00:00 NaN NaN
... ... ... ...
2227 2025-06-01 19:00:00+00:00 39.169502 11.0
2228 2025-06-01 20:00:00+00:00 39.219501 11.0
2229 2025-06-01 21:00:00+00:00 39.269501 12.0
2230 2025-06-01 22:00:00+00:00 39.219501 12.0
2231 2025-06-01 23:00:00+00:00 38.669502 12.0
dew_point_2m apparent_temperature precipitation_probability \
0 NaN NaN 0.0
1 NaN NaN 0.0
2 NaN NaN 0.0
3 NaN NaN 0.0
4 NaN NaN 0.0
... ... ... ...
2227 3.341017 36.008011 1.0
2228 3.379128 35.056602 1.0
2229 4.657262 36.113987 2.0
2230 4.618772 35.940289 2.0
2231 4.195249 33.902687 3.0
precipitation rain showers pressure_msl surface_pressure \
0 NaN NaN NaN NaN NaN
1 NaN NaN NaN NaN NaN
2 NaN NaN NaN NaN NaN
3 NaN NaN NaN NaN NaN
4 NaN NaN NaN NaN NaN
... ... ... ... ... ...
2227 0.0 0.0 0.0 1005.900024 941.469604
2228 0.0 0.0 0.0 1005.200012 940.824158
2229 0.0 0.0 0.0 1004.700012 940.366394
2230 0.0 0.0 0.0 1003.700012 939.420227
2231 0.0 0.0 0.0 1003.599976 939.217957
cloud_cover cloud_cover_low cloud_cover_mid cloud_cover_high \
0 NaN NaN NaN NaN
1 NaN NaN NaN NaN
2 NaN NaN NaN NaN
3 NaN NaN NaN NaN
4 NaN NaN NaN NaN
... ... ... ... ...
2227 100.0 0.0 1.0 100.0
2228 100.0 0.0 0.0 100.0
2229 100.0 0.0 5.0 100.0
2230 55.0 0.0 0.0 55.0
2231 76.0 0.0 0.0 76.0
visibility wind_speed_10m soil_moisture_0_to_1cm \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 90000.0 23.950148 0.032
2228 90000.0 29.686359 0.032
2229 90000.0 25.499081 0.031
2230 90000.0 24.485292 0.031
2231 90000.0 29.810522 0.031
soil_moisture_1_to_3cm soil_moisture_3_to_9cm soil_moisture_9_to_27cm \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 0.076 0.105 0.103
2228 0.076 0.105 0.103
2229 0.076 0.105 0.102
2230 0.076 0.105 0.102
2231 0.076 0.105 0.102
soil_moisture_27_to_81cm evapotranspiration snowfall snow_depth \
0 NaN 0.0 NaN NaN
1 NaN 0.0 NaN NaN
2 NaN 0.0 NaN NaN
3 NaN 0.0 NaN NaN
4 NaN 0.0 NaN NaN
... ... ... ... ...
2227 0.11 -0.0 0.0 0.0
2228 0.11 -0.0 0.0 0.0
2229 0.11 -0.0 0.0 0.0
2230 0.11 -0.0 0.0 0.0
2231 0.11 -0.0 0.0 0.0
et0_fao_evapotranspiration vapour_pressure_deficit wind_speed_80m \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 0.989649 6.279035 26.729847
2228 1.043472 6.295875 33.092594
2229 1.006306 6.241590 28.585449
2230 0.951399 6.224906 27.267473
2231 0.912528 6.043872 35.188046
wind_speed_120m wind_speed_180m wind_direction_10m \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 42.661774 31.433586 222.563309
2228 36.379044 31.926540 194.036270
2229 46.478695 30.993471 210.547501
2230 49.953789 34.131718 221.423569
2231 48.185936 33.985218 227.447083
wind_direction_80m wind_direction_120m wind_direction_180m \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 224.454407 169.077240 159.904678
2228 194.489807 179.421295 187.124924
2229 211.087433 235.304779 210.735413
2230 222.324509 222.018463 217.716049
2231 228.317871 182.622345 216.384445
temperature_80m temperature_120m temperature_180m \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 34.609997 34.359997 32.563999
2228 35.059998 34.809998 32.963997
2229 34.809998 34.559998 32.863998
2230 34.059998 33.809998 32.764000
2231 32.859997 32.609997 32.764000
soil_temperature_0cm soil_temperature_6cm soil_temperature_18cm \
0 NaN NaN NaN
1 NaN NaN NaN
2 NaN NaN NaN
3 NaN NaN NaN
4 NaN NaN NaN
... ... ... ...
2227 45.113998 40.563999 33.863998
2228 44.113998 40.563999 34.663998
2229 44.363998 40.813999 35.363998
2230 42.963997 40.813999 35.913998
2231 42.413998 40.363998 36.313999
soil_temperature_54cm wind_gusts_10m
0 NaN NaN
1 NaN NaN
2 NaN NaN
3 NaN NaN
4 NaN NaN
... ... ...
2227 30.714001 27.359999
2228 30.764000 33.480000
2229 30.764000 29.160000
2230 30.814001 28.080000
2231 30.864000 36.360001
[2232 rows x 42 columns]
In [ ]:
In [26]:
# Store the data into the CSV file
path1 = r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\Global_Weather\openmeteo_data_lasvegas_nv.csv'
hourly_dataframe.to_csv(path1, header='column_names')
In [3]:
# Visualize geolocation by Latitude and Longitude of Las Vegas and Nashville
import pandas as pd
import geopandas as gpd
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20,10)
# Load a world map shapefile (built-in dataset)
world = gpd.read_file(r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\110m_cultural\ne_110m_admin_0_countries.shp')
worldpop = gpd.read_file(r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\110m_cultural\ne_110m_populated_places.shp')
# a. Las Vegas (dry), coord: 36.1716° N, 115.1391° W
# b. Nashville (wet), coord: 36.1627° N, 86.7816° W
data = {'longitude': [-115.1391, -86.7816],
'latitude': [36.1716, 36.1627],
'value': [10, 20]}
df = pd.DataFrame(data)
# Create geometry column
geometry = gpd.points_from_xy(df['longitude'], df['latitude'], crs="EPSG:4326")
# Create GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=geometry)
print(gdf)
usapop = worldpop[worldpop['ADM0NAME'] == 'United States of America']
# # Plotting multiple layers
fig, ax = plt.subplots()
plt.title('Geolocation of Las Vegas and Nashville')
world.plot(ax = ax, cmap = 'hsv', edgecolor = 'black', column = 'SOVEREIGNT')
gdf.plot(ax = ax, color = 'yellow', markersize = 40) # The location of Las Vegas and Nashville can be seen as yellow dots in the map
longitude latitude value geometry 0 -115.1391 36.1716 10 POINT (-115.1391 36.1716) 1 -86.7816 36.1627 20 POINT (-86.7816 36.1627)
Out[3]:
<Axes: title={'center': 'Geolocation of Las Vegas and Nashville'}>
In [4]:
# Visualize Las Vegas (and Nevada) and Nashville (and Tennessee)
# States are depicted with blue color
# Cities are depicted with yellow color
stateprov = gpd.read_file(r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\110m_cultural\ne_110m_admin_1_states_provinces.shp')
usamain = stateprov[stateprov["name"] != "Alaska"]
usamain = usamain[usamain["name"] != "Hawaii"]
nevada = stateprov[stateprov["name"] == "Nevada"]
tennessee = stateprov[stateprov["name"] == "Tennessee"]
nevada['coords'] = nevada['geometry'].apply(lambda x: x.representative_point().coords[:])
nevada['coords'] = [coords[0] for coords in nevada['coords']]
tennessee['coords'] = tennessee['geometry'].apply(lambda x: x.representative_point().coords[:])
tennessee['coords'] = [coords[0] for coords in tennessee['coords']]
# # Plotting multiple layers
fig, ax = plt.subplots()
for idx, row in nevada.iterrows():
plt.annotate(text=row['name'], xy=row['coords'],
horizontalalignment='center')
for idx, row in tennessee.iterrows():
plt.annotate(text=row['name'], xy=row['coords'],
horizontalalignment='center')
usamain.plot(ax = ax, color='green', edgecolor = 'black')
nevada.plot(ax = ax, color = 'blue', markersize = 40)
tennessee.plot(ax = ax, color = 'blue', markersize = 40)
gdf.plot(ax = ax, color = 'yellow', markersize = 40)
C:\Users\Paulus Hendarwan\AppData\Roaming\Python\Python312\site-packages\geopandas\geodataframe.py:1981: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super().__setitem__(key, value) C:\Users\Paulus Hendarwan\AppData\Roaming\Python\Python312\site-packages\geopandas\geodataframe.py:1981: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame. Try using .loc[row_indexer,col_indexer] = value instead See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy super().__setitem__(key, value)
Out[4]:
<Axes: >
In [1]:
# Maps of Las Vegas
import pandas as pd
import geopandas as gpd
gdf = gpd.read_file(r"C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\cities.geojson")
lasvegasdf = gdf.loc[gdf['NAME'].isin(['LAS VEGAS'])]
# Remove all unwanted Las Vegases, just leave the one with Index 19199, Las Vegas in Nevada state.
# We use drop Method coz this Method doesn't change the GeoDataframe into Series.
# If we use .iloc or .loc Method, it will change the GeoDataframe into Series.
lasvegasdf = lasvegasdf.drop(19212)
lasvegasdf = lasvegasdf.drop(24676)
lasvegasdf.explore()
Out[1]:
Make this Notebook Trusted to load map: File -> Trust Notebook
In [ ]:
In [1]:
# Read from the CSV file
import pandas as pd
import matplotlib.pyplot as plt
path1 = r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\Global_Weather\openmeteo_data_lasvegas_nv.csv'
df1 = pd.read_csv(path1)
df1
Out[1]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | ... | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 0 | 2025-03-01 00:00:00+00:00 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 1 | 1 | 2025-03-01 01:00:00+00:00 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 2 | 2 | 2025-03-01 02:00:00+00:00 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 3 | 3 | 2025-03-01 03:00:00+00:00 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| 4 | 4 | 2025-03-01 04:00:00+00:00 | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2227 | 2227 | 2025-06-01 19:00:00+00:00 | 39.169502 | 11.0 | 3.341017 | 36.008010 | 1.0 | 0.0 | 0.0 | 0.0 | ... | 169.07724 | 159.90468 | 34.609997 | 34.359997 | 32.564000 | 45.114000 | 40.564 | 33.864000 | 30.714000 | 27.359999 |
| 2228 | 2228 | 2025-06-01 20:00:00+00:00 | 39.219500 | 11.0 | 3.379128 | 35.056602 | 1.0 | 0.0 | 0.0 | 0.0 | ... | 179.42130 | 187.12492 | 35.059998 | 34.809998 | 32.963997 | 44.114000 | 40.564 | 34.663998 | 30.764000 | 33.480000 |
| 2229 | 2229 | 2025-06-01 21:00:00+00:00 | 39.269500 | 12.0 | 4.657262 | 36.113987 | 2.0 | 0.0 | 0.0 | 0.0 | ... | 235.30478 | 210.73541 | 34.809998 | 34.559998 | 32.864000 | 44.364000 | 40.814 | 35.364000 | 30.764000 | 29.160000 |
| 2230 | 2230 | 2025-06-01 22:00:00+00:00 | 39.219500 | 12.0 | 4.618772 | 35.940290 | 2.0 | 0.0 | 0.0 | 0.0 | ... | 222.01846 | 217.71605 | 34.059998 | 33.809998 | 32.764000 | 42.963997 | 40.814 | 35.913998 | 30.814001 | 28.080000 |
| 2231 | 2231 | 2025-06-01 23:00:00+00:00 | 38.669502 | 12.0 | 4.195249 | 33.902687 | 3.0 | 0.0 | 0.0 | 0.0 | ... | 182.62234 | 216.38445 | 32.859997 | 32.609997 | 32.764000 | 42.413998 | 40.364 | 36.314000 | 30.864000 | 36.360000 |
2232 rows × 43 columns
In [2]:
# Remove Duplicate data from raw table
df1 = df1.drop_duplicates()
# Drop/remove all entries with blanks
# df.dropna(subset='temperature_2m', inplace=True)
# df.dropna(subset='relative_humidity_2m', inplace=True)
df1.dropna(inplace=True)
In [3]:
# Set to display all columns
pd.set_option('display.max_columns',None)
plt.rcParams['figure.figsize'] = (12, 8)
# Check whether there's still any blank rows/columns
df1[df1.isna().any(axis=1)]
Out[3]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m |
|---|
In [4]:
# Add Column 'Location'
df1['Location'] = 'Las Vegas'
# Split the Date into Year, Month and Date
df1[['ymd','hmstz']] = df1['date'].str.split(' ', expand=True)
df1[['year','month','day']] = df1['ymd'].str.split('-', expand=True)
df1[['hour','minute','sectz','tzsec']] = df1['hmstz'].str.split(':', expand=True)
In [5]:
df1
Out[5]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 437 | 437 | 2025-03-19 05:00:00+00:00 | 14.459500 | 14.0 | -12.757421 | 8.626808 | 0.0 | 0.0 | 0.0 | 0.0 | 1024.6 | 953.57040 | 0.0 | 0.0 | 0.0 | 0.0 | 24140.0 | 16.343367 | 0.038 | 0.067 | 0.100 | 0.113 | 0.113 | 0.0 | 0.0 | 0.0 | 0.151726 | 1.417936 | 22.768398 | 24.519993 | 29.555100 | 352.405430 | 18.435053 | 19.249622 | 55.922764 | 12.460000 | 12.360001 | 11.264000 | 9.714001 | 12.814000 | 16.114000 | 15.314000 | 22.680000 | Las Vegas | 2025-03-19 | 05:00:00+00:00 | 2025 | 03 | 19 | 05 | 00 | 00+00 | 00 |
| 438 | 438 | 2025-03-19 06:00:00+00:00 | 13.459500 | 17.0 | -11.145491 | 8.152900 | 0.0 | 0.0 | 0.0 | 0.0 | 1025.4 | 954.07745 | 0.0 | 0.0 | 0.0 | 0.0 | 24140.0 | 13.493999 | 0.038 | 0.067 | 0.100 | 0.113 | 0.113 | 0.0 | 0.0 | 0.0 | 0.120954 | 1.282614 | 23.565567 | 26.150720 | 30.500309 | 43.919170 | 18.711840 | 19.708078 | 59.500250 | 12.010000 | 11.960000 | 10.364000 | 9.264000 | 12.414001 | 15.714001 | 15.364000 | 24.840000 | Las Vegas | 2025-03-19 | 06:00:00+00:00 | 2025 | 03 | 19 | 06 | 00 | 00+00 | 00 |
| 439 | 439 | 2025-03-19 07:00:00+00:00 | 12.659500 | 20.0 | -9.752880 | 7.904532 | 0.0 | 0.0 | 0.0 | 0.0 | 1026.2 | 954.63040 | 0.0 | 0.0 | 0.0 | 0.0 | 24140.0 | 10.440000 | 0.038 | 0.067 | 0.100 | 0.113 | 0.113 | 0.0 | 0.0 | 0.0 | 0.090544 | 1.173374 | 21.900904 | 23.996807 | 23.277834 | 46.397110 | 25.301346 | 25.387870 | 58.276447 | 11.460000 | 11.410000 | 9.464001 | 8.164001 | 11.614000 | 15.264000 | 15.364000 | 19.080000 | Las Vegas | 2025-03-19 | 07:00:00+00:00 | 2025 | 03 | 19 | 07 | 00 | 00+00 | 00 |
| 440 | 440 | 2025-03-19 08:00:00+00:00 | 12.159500 | 21.0 | -9.549829 | 7.835932 | 0.0 | 0.0 | 0.0 | 0.0 | 1026.8 | 955.06824 | 0.0 | 0.0 | 0.0 | 0.0 | 24140.0 | 7.594208 | 0.038 | 0.067 | 0.100 | 0.113 | 0.113 | 0.0 | 0.0 | 0.0 | 0.065000 | 1.121307 | 17.873556 | 19.561127 | 15.629971 | 58.570484 | 27.597204 | 28.009092 | 61.073680 | 10.760000 | 10.760000 | 8.864000 | 6.914000 | 10.764000 | 14.764000 | 15.364000 | 12.959999 | Las Vegas | 2025-03-19 | 08:00:00+00:00 | 2025 | 03 | 19 | 08 | 00 | 00+00 | 00 |
| 441 | 441 | 2025-03-19 09:00:00+00:00 | 11.559500 | 22.0 | -9.461175 | 7.294112 | 0.0 | 0.0 | 0.0 | 0.0 | 1027.4 | 955.48145 | 0.0 | 0.0 | 0.0 | 0.0 | 24140.0 | 7.244860 | 0.038 | 0.067 | 0.100 | 0.113 | 0.113 | 0.0 | 0.0 | 0.0 | 0.059745 | 1.064184 | 17.114204 | 19.654087 | 8.913181 | 63.435013 | 22.249071 | 20.806877 | 46.636536 | 10.460000 | 10.460000 | 8.564000 | 5.864000 | 9.864000 | 14.264000 | 15.364000 | 11.520000 | Las Vegas | 2025-03-19 | 09:00:00+00:00 | 2025 | 03 | 19 | 09 | 00 | 00+00 | 00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2227 | 2227 | 2025-06-01 19:00:00+00:00 | 39.169502 | 11.0 | 3.341017 | 36.008010 | 1.0 | 0.0 | 0.0 | 0.0 | 1005.9 | 941.46960 | 100.0 | 0.0 | 1.0 | 100.0 | 90000.0 | 23.950148 | 0.032 | 0.076 | 0.105 | 0.103 | 0.110 | -0.0 | 0.0 | 0.0 | 0.989649 | 6.279035 | 26.729847 | 42.661774 | 31.433586 | 222.563310 | 224.454400 | 169.077240 | 159.904680 | 34.609997 | 34.359997 | 32.564000 | 45.114000 | 40.564000 | 33.864000 | 30.714000 | 27.359999 | Las Vegas | 2025-06-01 | 19:00:00+00:00 | 2025 | 06 | 01 | 19 | 00 | 00+00 | 00 |
| 2228 | 2228 | 2025-06-01 20:00:00+00:00 | 39.219500 | 11.0 | 3.379128 | 35.056602 | 1.0 | 0.0 | 0.0 | 0.0 | 1005.2 | 940.82416 | 100.0 | 0.0 | 0.0 | 100.0 | 90000.0 | 29.686360 | 0.032 | 0.076 | 0.105 | 0.103 | 0.110 | -0.0 | 0.0 | 0.0 | 1.043472 | 6.295875 | 33.092594 | 36.379044 | 31.926540 | 194.036270 | 194.489800 | 179.421300 | 187.124920 | 35.059998 | 34.809998 | 32.963997 | 44.114000 | 40.564000 | 34.663998 | 30.764000 | 33.480000 | Las Vegas | 2025-06-01 | 20:00:00+00:00 | 2025 | 06 | 01 | 20 | 00 | 00+00 | 00 |
| 2229 | 2229 | 2025-06-01 21:00:00+00:00 | 39.269500 | 12.0 | 4.657262 | 36.113987 | 2.0 | 0.0 | 0.0 | 0.0 | 1004.7 | 940.36640 | 100.0 | 0.0 | 5.0 | 100.0 | 90000.0 | 25.499080 | 0.031 | 0.076 | 0.105 | 0.102 | 0.110 | -0.0 | 0.0 | 0.0 | 1.006306 | 6.241590 | 28.585450 | 46.478695 | 30.993471 | 210.547500 | 211.087430 | 235.304780 | 210.735410 | 34.809998 | 34.559998 | 32.864000 | 44.364000 | 40.814000 | 35.364000 | 30.764000 | 29.160000 | Las Vegas | 2025-06-01 | 21:00:00+00:00 | 2025 | 06 | 01 | 21 | 00 | 00+00 | 00 |
| 2230 | 2230 | 2025-06-01 22:00:00+00:00 | 39.219500 | 12.0 | 4.618772 | 35.940290 | 2.0 | 0.0 | 0.0 | 0.0 | 1003.7 | 939.42020 | 55.0 | 0.0 | 0.0 | 55.0 | 90000.0 | 24.485292 | 0.031 | 0.076 | 0.105 | 0.102 | 0.110 | -0.0 | 0.0 | 0.0 | 0.951399 | 6.224906 | 27.267473 | 49.953790 | 34.131718 | 221.423570 | 222.324510 | 222.018460 | 217.716050 | 34.059998 | 33.809998 | 32.764000 | 42.963997 | 40.814000 | 35.913998 | 30.814001 | 28.080000 | Las Vegas | 2025-06-01 | 22:00:00+00:00 | 2025 | 06 | 01 | 22 | 00 | 00+00 | 00 |
| 2231 | 2231 | 2025-06-01 23:00:00+00:00 | 38.669502 | 12.0 | 4.195249 | 33.902687 | 3.0 | 0.0 | 0.0 | 0.0 | 1003.6 | 939.21796 | 76.0 | 0.0 | 0.0 | 76.0 | 90000.0 | 29.810522 | 0.031 | 0.076 | 0.105 | 0.102 | 0.110 | -0.0 | 0.0 | 0.0 | 0.912528 | 6.043872 | 35.188046 | 48.185936 | 33.985218 | 227.447080 | 228.317870 | 182.622340 | 216.384450 | 32.859997 | 32.609997 | 32.764000 | 42.413998 | 40.364000 | 36.314000 | 30.864000 | 36.360000 | Las Vegas | 2025-06-01 | 23:00:00+00:00 | 2025 | 06 | 01 | 23 | 00 | 00+00 | 00 |
1795 rows × 53 columns
In [6]:
# Find out the correlation among Columns to filter what Columns we'll use for analysis
# For first glance, we filter out by common logic, what Columns are possible to have strong enough correlations.
# We choose a bunch of Columns specified with Index numbers.
df1h = df1[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m',
'dew_point_2m','vapour_pressure_deficit','temperature_2m','cloud_cover_high','cloud_cover_low','wind_speed_10m',
'soil_moisture_3_to_9cm','temperature_80m','soil_temperature_0cm']]
df1h
Out[6]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 437 | 0.0 | 0.0 | 0.038 | 0.067 | 14.0 | -12.757421 | 1.417936 | 14.459500 | 0.0 | 0.0 | 16.343367 | 0.100 | 12.460000 | 9.714001 |
| 438 | 0.0 | 0.0 | 0.038 | 0.067 | 17.0 | -11.145491 | 1.282614 | 13.459500 | 0.0 | 0.0 | 13.493999 | 0.100 | 12.010000 | 9.264000 |
| 439 | 0.0 | 0.0 | 0.038 | 0.067 | 20.0 | -9.752880 | 1.173374 | 12.659500 | 0.0 | 0.0 | 10.440000 | 0.100 | 11.460000 | 8.164001 |
| 440 | 0.0 | 0.0 | 0.038 | 0.067 | 21.0 | -9.549829 | 1.121307 | 12.159500 | 0.0 | 0.0 | 7.594208 | 0.100 | 10.760000 | 6.914000 |
| 441 | 0.0 | 0.0 | 0.038 | 0.067 | 22.0 | -9.461175 | 1.064184 | 11.559500 | 0.0 | 0.0 | 7.244860 | 0.100 | 10.460000 | 5.864000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2227 | 1.0 | 1.0 | 0.032 | 0.076 | 11.0 | 3.341017 | 6.279035 | 39.169502 | 100.0 | 0.0 | 23.950148 | 0.105 | 34.609997 | 45.114000 |
| 2228 | 1.0 | 0.0 | 0.032 | 0.076 | 11.0 | 3.379128 | 6.295875 | 39.219500 | 100.0 | 0.0 | 29.686360 | 0.105 | 35.059998 | 44.114000 |
| 2229 | 2.0 | 5.0 | 0.031 | 0.076 | 12.0 | 4.657262 | 6.241590 | 39.269500 | 100.0 | 0.0 | 25.499080 | 0.105 | 34.809998 | 44.364000 |
| 2230 | 2.0 | 0.0 | 0.031 | 0.076 | 12.0 | 4.618772 | 6.224906 | 39.219500 | 55.0 | 0.0 | 24.485292 | 0.105 | 34.059998 | 42.963997 |
| 2231 | 3.0 | 0.0 | 0.031 | 0.076 | 12.0 | 4.195249 | 6.043872 | 38.669502 | 76.0 | 0.0 | 29.810522 | 0.105 | 32.859997 | 42.413998 |
1795 rows × 14 columns
In [7]:
# Create a Function to generate sorted, limited Correlation Table of a certain Column/parameter
def gencorr(param):
df1hcorr = df1h.corr(numeric_only=True).abs().sort_values(by=param, ascending=False)
df1hcorr = df1hcorr.where(df1hcorr[param] > 0.5)
df1hcorr = df1hcorr.dropna()
corresult = df1hcorr[param].round(2)
return corresult
In [8]:
# Generate Correlation Table for 'temperature_2m'
gencorr('temperature_2m')
Out[8]:
temperature_2m 1.00 vapour_pressure_deficit 0.96 temperature_80m 0.93 soil_temperature_0cm 0.84 relative_humidity_2m 0.57 Name: temperature_2m, dtype: float64
In [9]:
# Draw plot of the most correlated column (vapour_pressure_deficit) with 'temperature_2m' with dual y-axis plot.
# The 'temperature_2m' and 'vapour_pressure_deficit' fluctuations for the most part are similar except at several points.
df04 = df1.where(df1['month'] == '04').dropna()
df05 = df1.where(df1['month'] == '05').dropna()
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('temperature_2m vs vapour_pressure_deficit in April 2025')
ax1.plot(df04[['temperature_2m']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['vapour_pressure_deficit']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('temperature_2m vs vapour_pressure_deficit in May 2025')
ax1.plot(df05[['temperature_2m']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['vapour_pressure_deficit']], color='green', label='Data 2')
Out[9]:
[<matplotlib.lines.Line2D at 0x1c817958ce0>]
In [10]:
# Draw plot of the most correlated columns (soil_temperature_0cm) with 'temperature_2m' in single y-axis plot.
# For the most part of the Plot, the fluctuation of the 'soil_temperature_0cm' is similar except on several points.
df1[['temperature_2m','soil_temperature_0cm']].plot(subplots=[('temperature_2m','soil_temperature_0cm')])
Out[10]:
array([<Axes: >], dtype=object)
In [11]:
# Generate Correlation Table for 'relative_humidity_2m'
gencorr('relative_humidity_2m')
Out[11]:
relative_humidity_2m 1.00 dew_point_2m 0.73 soil_moisture_0_to_1cm 0.68 vapour_pressure_deficit 0.66 cloud_cover_low 0.64 temperature_80m 0.59 temperature_2m 0.57 precipitation_probability 0.56 Name: relative_humidity_2m, dtype: float64
In [12]:
# Draw plot of the most correlated column (dew_point_2m) with 'relative_humidity_2m'
# The relative_humidity_2m values fluctuations for the most part are similar to dew_point_2m except at several points.
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('relative_humidity_2m vs dew_point_2m in April 2025')
ax1.plot(df04[['relative_humidity_2m']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['dew_point_2m']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('relative_humidity_2m vs dew_point_2m in May 2025')
ax1.plot(df05[['relative_humidity_2m']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['dew_point_2m']], color='green', label='Data 2')
Out[12]:
[<matplotlib.lines.Line2D at 0x1c817d9a390>]
In [13]:
# Draw plot of the most correlated column (soil_moisture_0_to_1cm) with 'relative_humidity_2m'
# The relative_humidity_2m values fluctuations for the most part are similar to dew_point_2m except at several points.
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('relative_humidity_2m vs soil_moisture_0_to_1cm in April 2025')
ax1.plot(df04[['relative_humidity_2m']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['soil_moisture_0_to_1cm']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('relative_humidity_2m vs soil_moisture_0_to_1cm in May 2025')
ax1.plot(df05[['relative_humidity_2m']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['soil_moisture_0_to_1cm']], color='green', label='Data 2')
Out[13]:
[<matplotlib.lines.Line2D at 0x1c818798c80>]
In [14]:
# Generate Correlation Table for 'dew_point_2m'
gencorr('dew_point_2m')
Out[14]:
dew_point_2m 1.00 relative_humidity_2m 0.73 soil_moisture_0_to_1cm 0.55 soil_moisture_1_to_3cm 0.51 Name: dew_point_2m, dtype: float64
In [15]:
# Draw plot of the most correlated column (soil_moisture_0_to_1cm) with 'dew_point_2m'
# The dew_point_2m values fluctuations for the more than halfpart are similar to soil_moisture_0_to_1cm.
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('dew_point_2m vs soil_moisture_0_to_1cm in April 2025')
ax1.plot(df04[['dew_point_2m']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['soil_moisture_0_to_1cm']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('dew_point_2m vs soil_moisture_0_to_1cm in May 2025')
ax1.plot(df05[['dew_point_2m']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['soil_moisture_0_to_1cm']], color='green', label='Data 2')
Out[15]:
[<matplotlib.lines.Line2D at 0x1c81a2fd370>]
In [16]:
# Generate Correlation Table for 'precipitation_probability'
gencorr('precipitation_probability')
Out[16]:
precipitation_probability 1.00 cloud_cover_low 0.61 relative_humidity_2m 0.56 Name: precipitation_probability, dtype: float64
In [17]:
# Draw plot of the most correlated column (cloud_cover_low) with 'precipitation_probability'
# The dew_point_2m values fluctuations for the more than halfpart are similar to soil_moisture_0_to_1cm.
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('precipitation_probability vs cloud_cover_low in April 2025')
ax1.plot(df04[['precipitation_probability']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['cloud_cover_low']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('precipitation_probability vs cloud_cover_low in May 2025')
ax1.plot(df05[['precipitation_probability']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['cloud_cover_low']], color='green', label='Data 2')
Out[17]:
[<matplotlib.lines.Line2D at 0x1c81a8d55e0>]
In [18]:
# Generate Correlation Table for 'cloud_cover_low'
gencorr('cloud_cover_low')
Out[18]:
cloud_cover_low 1.00 relative_humidity_2m 0.64 precipitation_probability 0.61 soil_moisture_0_to_1cm 0.50 Name: cloud_cover_low, dtype: float64
In [19]:
# Draw plot of the most correlated column (relative_humidity_2m) with 'cloud_cover_low'
# The 'cloud_cover_low' and 'relative_humidity_2m' fluctuations for more than half part are similar except at several points.
import matplotlib.pyplot as plt
df04[['cloud_cover_low','relative_humidity_2m']].plot(title='cloud_cover_low vs relative_humidity_2m in April 2025')
df05[['cloud_cover_low','relative_humidity_2m']].plot(title='cloud_cover_low vs relative_humidity_2m in May 2025')
Out[19]:
<Axes: title={'center': 'cloud_cover_low vs relative_humidity_2m in May 2025'}>
In [20]:
# Generate Correlation Table for 'cloud_cover_mid'
gencorr('cloud_cover_mid')
Out[20]:
cloud_cover_mid 1.0 Name: cloud_cover_mid, dtype: float64
In [21]:
# Generate Correlation Table for 'cloud_cover_high'
gencorr('cloud_cover_high')
Out[21]:
cloud_cover_high 1.0 Name: cloud_cover_high, dtype: float64
In [22]:
# Generate Correlation Table for 'wind_speed_10m'
gencorr('wind_speed_10m')
Out[22]:
wind_speed_10m 1.0 Name: wind_speed_10m, dtype: float64
In [23]:
# Generate Correlation Table for 'soil_moisture_0_to_1cm'
gencorr('soil_moisture_0_to_1cm')
Out[23]:
soil_moisture_0_to_1cm 1.00 soil_moisture_1_to_3cm 0.71 relative_humidity_2m 0.68 soil_moisture_3_to_9cm 0.60 dew_point_2m 0.55 cloud_cover_low 0.50 Name: soil_moisture_0_to_1cm, dtype: float64
In [24]:
# Draw plot of the most correlated column (soil_moisture_1_to_3cm) with 'soil_moisture_0_to_1cm'
# The 'soil_moisture_0_to_1cm' and 'soil_moisture_1_to_3cm' fluctuations for more than half part are similar.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['soil_moisture_0_to_1cm','soil_moisture_1_to_3cm']].plot(title='soil_moisture_0_to_1cm vs soil_moisture_1_to_3cm in April 2025')
df05[['soil_moisture_0_to_1cm','soil_moisture_1_to_3cm']].plot(title='soil_moisture_0_to_1cm vs soil_moisture_1_to_3cm in May 2025')
Out[24]:
<Axes: title={'center': 'soil_moisture_0_to_1cm vs soil_moisture_1_to_3cm in May 2025'}>
In [25]:
# Draw plot of the most correlated column (soil_moisture_3_to_9cm) with 'soil_moisture_0_to_1cm'
# The 'soil_moisture_0_to_1cm' and 'soil_moisture_3_to_9cm' fluctuations for more than half part are similar.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['soil_moisture_0_to_1cm','soil_moisture_3_to_9cm']].plot(title='soil_moisture_0_to_1cm vs soil_moisture_3_to_9cm in April 2025')
df05[['soil_moisture_0_to_1cm','soil_moisture_3_to_9cm']].plot(title='soil_moisture_0_to_1cm vs soil_moisture_3_to_9cm in May 2025')
Out[25]:
<Axes: title={'center': 'soil_moisture_0_to_1cm vs soil_moisture_3_to_9cm in May 2025'}>
In [26]:
# Generate Correlation Table for 'temperature_80m'
gencorr('temperature_80m')
Out[26]:
temperature_80m 1.00 temperature_2m 0.93 vapour_pressure_deficit 0.90 soil_temperature_0cm 0.68 relative_humidity_2m 0.59 Name: temperature_80m, dtype: float64
In [27]:
# Draw plot of the most correlated column with 'temperature_80m'
# The 'temperature_80m' and 'temperature_2m' fluctuations for the most part are similar except at very few points.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['temperature_80m','temperature_2m']].plot(title='temperature_80m vs temperature_2m in April 2025')
df05[['temperature_80m','temperature_2m']].plot(title='temperature_80m vs temperature_2m in May 2025')
Out[27]:
<Axes: title={'center': 'temperature_80m vs temperature_2m in May 2025'}>
In [28]:
# Draw plot of the most correlated column (cloud_cover_low) with 'temperature_80m'
# The vapour_pressure_deficit values fluctuations for the most part are similar except at very few points.
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('temperature_80m vs vapour_pressure_deficit in April 2025')
ax1.plot(df04[['temperature_80m']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['vapour_pressure_deficit']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('temperature_80m vs vapour_pressure_deficit in May 2025')
ax1.plot(df05[['temperature_80m']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['vapour_pressure_deficit']], color='green', label='Data 2')
Out[28]:
[<matplotlib.lines.Line2D at 0x1c818823470>]
In [29]:
# Draw plot of the most correlated column with 'temperature_80m'
# The 'temperature_80m' and 'soil_temperature_0cm' fluctuations for the most part are similar except at several points.
import matplotlib.pyplot as plt
# plt.title('temperature_80m vs soil_temperature_0cm in April 2025')
df04[['temperature_80m','soil_temperature_0cm']].plot(title='temperature_80m vs soil_temperature_0cm in April 2025')
df05[['temperature_80m','soil_temperature_0cm']].plot(title='temperature_80m vs soil_temperature_0cm in May 2025')
Out[29]:
<Axes: title={'center': 'temperature_80m vs soil_temperature_0cm in May 2025'}>
In [30]:
# Generate Correlation Table for 'soil_temperature_0cm'
gencorr('soil_temperature_0cm')
Out[30]:
soil_temperature_0cm 1.00 temperature_2m 0.84 vapour_pressure_deficit 0.80 temperature_80m 0.68 Name: soil_temperature_0cm, dtype: float64
In [31]:
# Draw plot of the most correlated column (vapour_pressure_deficit) with 'soil_temperature_0cm'
# The soil_temperature_0cm and vapour_pressure_deficit values fluctuations for the most part are similar except at very few points.
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('soil_temperature_0cm vs vapour_pressure_deficit in April 2025')
ax1.plot(df04[['soil_temperature_0cm']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['vapour_pressure_deficit']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('soil_temperature_0cm vs vapour_pressure_deficit in May 2025')
ax1.plot(df05[['soil_temperature_0cm']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['vapour_pressure_deficit']], color='green', label='Data 2')
Out[31]:
[<matplotlib.lines.Line2D at 0x1c81bd86ff0>]
In [32]:
# Generate Correlation Table for 'soil_moisture_1_to_3cm'
gencorr('soil_moisture_1_to_3cm')
Out[32]:
soil_moisture_1_to_3cm 1.00 soil_moisture_3_to_9cm 0.95 soil_moisture_0_to_1cm 0.71 dew_point_2m 0.51 Name: soil_moisture_1_to_3cm, dtype: float64
In [33]:
# Draw plot of the most correlated column with 'soil_moisture_1_to_3cm'
# The 'soil_moisture_1_to_3cm' and 'soil_moisture_3_to_9cm' fluctuations for the most part are similar except at very few points.
import matplotlib.pyplot as plt
# plt.title('dew_point_2m vs temperature_2m in April 2025')
df04[['soil_moisture_1_to_3cm','soil_moisture_3_to_9cm']].plot(title='soil_moisture_1_to_3cm vs soil_moisture_3_to_9cm in April 2025')
df05[['soil_moisture_1_to_3cm','soil_moisture_3_to_9cm']].plot(title='soil_moisture_1_to_3cm vs soil_moisture_3_to_9cm in May 2025')
Out[33]:
<Axes: title={'center': 'soil_moisture_1_to_3cm vs soil_moisture_3_to_9cm in May 2025'}>
In [34]:
# Generate Correlation Table for 'soil_moisture_3_to_9cm'
gencorr('soil_moisture_3_to_9cm')
Out[34]:
soil_moisture_3_to_9cm 1.00 soil_moisture_1_to_3cm 0.95 soil_moisture_0_to_1cm 0.60 Name: soil_moisture_3_to_9cm, dtype: float64
In [35]:
# Generate Correlation Table for 'vapour_pressure_deficit'
gencorr('vapour_pressure_deficit')
Out[35]:
vapour_pressure_deficit 1.00 temperature_2m 0.96 temperature_80m 0.90 soil_temperature_0cm 0.80 relative_humidity_2m 0.66 Name: vapour_pressure_deficit, dtype: float64
In [36]:
# Draw plot of the most correlated column (vapour_pressure_deficit) with 'relative_humidity_2m'
# The vapour_pressure_deficit and relative_humidity_2m values fluctuations for more than half part are good reverse.
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots()
plt.title('vapour_pressure_deficit vs relative_humidity_2m in April 2025')
ax1.plot(df04[['vapour_pressure_deficit']], color='blue')
ax2 = ax1.twinx()
ax2.plot(df04[['relative_humidity_2m']], color='green', label='Data 2')
fig, ax1 = plt.subplots()
plt.title('vapour_pressure_deficit vs relative_humidity_2m in May 2025')
ax1.plot(df05[['vapour_pressure_deficit']], color='blue', label='Data 1')
ax2 = ax1.twinx()
ax2.plot(df05[['relative_humidity_2m']], color='green', label='Data 2')
Out[36]:
[<matplotlib.lines.Line2D at 0x1c81dd623f0>]
In [ ]:
In [37]:
# Draw the Heatmap of Weather Data Correlation
import seaborn as sns
import matplotlib.pyplot as plt
sns.heatmap(df1h.corr(numeric_only=True), annot=True)
plt.rcParams['figure.figsize'] = (20,10)
plt.show()
In [38]:
# How about if we limit the 'temperature_2m' to certain range, let's say, more than 18C
# Data for 4 Columns where temperature_2m more than 18 C
df1h2 = df1h.where(df1h['temperature_2m'] > 18)
df1h2 = df1h2.dropna()
df1h2
Out[38]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 479 | 0.0 | 0.0 | 0.037 | 0.067 | 12.0 | -11.587272 | 1.859420 | 18.359500 | 100.0 | 0.0 | 4.104631 | 0.099 | 17.410000 | 26.514000 |
| 480 | 0.0 | 0.0 | 0.038 | 0.069 | 12.0 | -11.116996 | 1.930476 | 18.959500 | 100.0 | 0.0 | 3.563818 | 0.101 | 17.960001 | 23.164000 |
| 481 | 0.0 | 0.0 | 0.038 | 0.069 | 11.0 | -11.971617 | 1.989208 | 19.259500 | 100.0 | 0.0 | 2.545584 | 0.101 | 18.110000 | 19.364000 |
| 482 | 0.0 | 0.0 | 0.038 | 0.069 | 11.0 | -11.894015 | 2.001618 | 19.359500 | 93.0 | 0.0 | 4.349896 | 0.101 | 18.010000 | 15.264000 |
| 483 | 0.0 | 0.0 | 0.038 | 0.069 | 11.0 | -11.971617 | 1.989208 | 19.259500 | 46.0 | 0.0 | 6.120000 | 0.101 | 17.810000 | 13.264000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2227 | 1.0 | 1.0 | 0.032 | 0.076 | 11.0 | 3.341017 | 6.279035 | 39.169502 | 100.0 | 0.0 | 23.950148 | 0.105 | 34.609997 | 45.114000 |
| 2228 | 1.0 | 0.0 | 0.032 | 0.076 | 11.0 | 3.379128 | 6.295875 | 39.219500 | 100.0 | 0.0 | 29.686360 | 0.105 | 35.059998 | 44.114000 |
| 2229 | 2.0 | 5.0 | 0.031 | 0.076 | 12.0 | 4.657262 | 6.241590 | 39.269500 | 100.0 | 0.0 | 25.499080 | 0.105 | 34.809998 | 44.364000 |
| 2230 | 2.0 | 0.0 | 0.031 | 0.076 | 12.0 | 4.618772 | 6.224906 | 39.219500 | 55.0 | 0.0 | 24.485292 | 0.105 | 34.059998 | 42.963997 |
| 2231 | 3.0 | 0.0 | 0.031 | 0.076 | 12.0 | 4.195249 | 6.043872 | 38.669502 | 76.0 | 0.0 | 29.810522 | 0.105 | 32.859997 | 42.413998 |
1329 rows × 14 columns
In [ ]:
In [39]:
sns.heatmap(df1h2.corr(numeric_only=True), annot=True)
plt.rcParams['figure.figsize'] = (20,10)
plt.show()
In [40]:
######### Conclusion from the Heatmap ##############
# 'relative_humidity_2m' has correlation of -0.86 to 'vapour_pressure_deficit'.
# 'temperature_80m' has correlation of 0.92 to 'soil_temperature_6cm'.
# 'temperature_80m' has correlation of 0.86 to 'soil_temperature_0cm'.
# 'temperature_2m' has correlation of 0.86 to 'soil_temperature_0cm'.
# 'dew_point_2m' has correlation of 0.81 to 'relative_humidity_2m' parameter if temperature_2m > 18C
# 'cloud_cover_mid' has correlation of 0.51 to 'precipitation_probability'.
In [41]:
# LAB : Check 'corr' method of Pandas with mathematical high order polynomial equations (quadratic to power-of-5 equations)
# Equations used:
# 1. y1 = 2x^2 + 5x + 6
# 2. y2 = 2x^3 + 7x^2 + 10x + 20
# 3. y3 = 5x^4 + 12x^3 + 8x^2 + 6x + 20
# 4. y4 = 3x^5 + 8x^4 + 10x^3 + 4x^2 + 9x + 17
dfq = pd.read_csv(r'C:\PAULDATA\Paulus_Data_Analyst\Portfolio_Projects\Global_Weather\quadratic_equation.csv')
dfq
Out[41]:
| x | y1 | y2 | y3 | y4 | |
|---|---|---|---|---|---|
| 0 | 1 | 13 | 39 | 51 | 51 |
| 1 | 2 | 24 | 84 | 240 | 355 |
| 2 | 3 | 39 | 167 | 839 | 1727 |
| 3 | 4 | 58 | 300 | 2220 | 5877 |
| 4 | 5 | 81 | 495 | 4875 | 15787 |
| 5 | 6 | 108 | 764 | 9416 | 36071 |
| 6 | 7 | 139 | 1119 | 16575 | 73335 |
| 7 | 8 | 174 | 1572 | 27204 | 136537 |
| 8 | 9 | 213 | 2135 | 42275 | 237347 |
| 9 | 10 | 256 | 2820 | 62880 | 390507 |
| 10 | 11 | 303 | 3639 | 90231 | 614191 |
| 11 | 12 | 354 | 4604 | 125660 | 930365 |
| 12 | 13 | 409 | 5727 | 170619 | 1365147 |
| 13 | 14 | 468 | 7020 | 226680 | 1949167 |
| 14 | 15 | 531 | 8495 | 295535 | 2717927 |
| 15 | 16 | 598 | 10164 | 378996 | 3712161 |
| 16 | 17 | 669 | 12039 | 478995 | 4978195 |
| 17 | 18 | 744 | 14132 | 597584 | 6568307 |
In [42]:
sns.heatmap(dfq.corr(numeric_only=True), annot=True)
plt.rcParams['figure.figsize'] = (20,10)
plt.show()
In [43]:
# CONCLUSION:
# 'corr' Method of Pandas is accurate enough until cube equation, but for higher order equation, is much less.
# Correlation value of 0.8 is still can be regarded as correlated enough, due to corr method algorithm limitation.
In [44]:
# The plot of all data of all Columns/parameters
df1
Out[44]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 437 | 437 | 2025-03-19 05:00:00+00:00 | 14.459500 | 14.0 | -12.757421 | 8.626808 | 0.0 | 0.0 | 0.0 | 0.0 | 1024.6 | 953.57040 | 0.0 | 0.0 | 0.0 | 0.0 | 24140.0 | 16.343367 | 0.038 | 0.067 | 0.100 | 0.113 | 0.113 | 0.0 | 0.0 | 0.0 | 0.151726 | 1.417936 | 22.768398 | 24.519993 | 29.555100 | 352.405430 | 18.435053 | 19.249622 | 55.922764 | 12.460000 | 12.360001 | 11.264000 | 9.714001 | 12.814000 | 16.114000 | 15.314000 | 22.680000 | Las Vegas | 2025-03-19 | 05:00:00+00:00 | 2025 | 03 | 19 | 05 | 00 | 00+00 | 00 |
| 438 | 438 | 2025-03-19 06:00:00+00:00 | 13.459500 | 17.0 | -11.145491 | 8.152900 | 0.0 | 0.0 | 0.0 | 0.0 | 1025.4 | 954.07745 | 0.0 | 0.0 | 0.0 | 0.0 | 24140.0 | 13.493999 | 0.038 | 0.067 | 0.100 | 0.113 | 0.113 | 0.0 | 0.0 | 0.0 | 0.120954 | 1.282614 | 23.565567 | 26.150720 | 30.500309 | 43.919170 | 18.711840 | 19.708078 | 59.500250 | 12.010000 | 11.960000 | 10.364000 | 9.264000 | 12.414001 | 15.714001 | 15.364000 | 24.840000 | Las Vegas | 2025-03-19 | 06:00:00+00:00 | 2025 | 03 | 19 | 06 | 00 | 00+00 | 00 |
| 439 | 439 | 2025-03-19 07:00:00+00:00 | 12.659500 | 20.0 | -9.752880 | 7.904532 | 0.0 | 0.0 | 0.0 | 0.0 | 1026.2 | 954.63040 | 0.0 | 0.0 | 0.0 | 0.0 | 24140.0 | 10.440000 | 0.038 | 0.067 | 0.100 | 0.113 | 0.113 | 0.0 | 0.0 | 0.0 | 0.090544 | 1.173374 | 21.900904 | 23.996807 | 23.277834 | 46.397110 | 25.301346 | 25.387870 | 58.276447 | 11.460000 | 11.410000 | 9.464001 | 8.164001 | 11.614000 | 15.264000 | 15.364000 | 19.080000 | Las Vegas | 2025-03-19 | 07:00:00+00:00 | 2025 | 03 | 19 | 07 | 00 | 00+00 | 00 |
| 440 | 440 | 2025-03-19 08:00:00+00:00 | 12.159500 | 21.0 | -9.549829 | 7.835932 | 0.0 | 0.0 | 0.0 | 0.0 | 1026.8 | 955.06824 | 0.0 | 0.0 | 0.0 | 0.0 | 24140.0 | 7.594208 | 0.038 | 0.067 | 0.100 | 0.113 | 0.113 | 0.0 | 0.0 | 0.0 | 0.065000 | 1.121307 | 17.873556 | 19.561127 | 15.629971 | 58.570484 | 27.597204 | 28.009092 | 61.073680 | 10.760000 | 10.760000 | 8.864000 | 6.914000 | 10.764000 | 14.764000 | 15.364000 | 12.959999 | Las Vegas | 2025-03-19 | 08:00:00+00:00 | 2025 | 03 | 19 | 08 | 00 | 00+00 | 00 |
| 441 | 441 | 2025-03-19 09:00:00+00:00 | 11.559500 | 22.0 | -9.461175 | 7.294112 | 0.0 | 0.0 | 0.0 | 0.0 | 1027.4 | 955.48145 | 0.0 | 0.0 | 0.0 | 0.0 | 24140.0 | 7.244860 | 0.038 | 0.067 | 0.100 | 0.113 | 0.113 | 0.0 | 0.0 | 0.0 | 0.059745 | 1.064184 | 17.114204 | 19.654087 | 8.913181 | 63.435013 | 22.249071 | 20.806877 | 46.636536 | 10.460000 | 10.460000 | 8.564000 | 5.864000 | 9.864000 | 14.264000 | 15.364000 | 11.520000 | Las Vegas | 2025-03-19 | 09:00:00+00:00 | 2025 | 03 | 19 | 09 | 00 | 00+00 | 00 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 2227 | 2227 | 2025-06-01 19:00:00+00:00 | 39.169502 | 11.0 | 3.341017 | 36.008010 | 1.0 | 0.0 | 0.0 | 0.0 | 1005.9 | 941.46960 | 100.0 | 0.0 | 1.0 | 100.0 | 90000.0 | 23.950148 | 0.032 | 0.076 | 0.105 | 0.103 | 0.110 | -0.0 | 0.0 | 0.0 | 0.989649 | 6.279035 | 26.729847 | 42.661774 | 31.433586 | 222.563310 | 224.454400 | 169.077240 | 159.904680 | 34.609997 | 34.359997 | 32.564000 | 45.114000 | 40.564000 | 33.864000 | 30.714000 | 27.359999 | Las Vegas | 2025-06-01 | 19:00:00+00:00 | 2025 | 06 | 01 | 19 | 00 | 00+00 | 00 |
| 2228 | 2228 | 2025-06-01 20:00:00+00:00 | 39.219500 | 11.0 | 3.379128 | 35.056602 | 1.0 | 0.0 | 0.0 | 0.0 | 1005.2 | 940.82416 | 100.0 | 0.0 | 0.0 | 100.0 | 90000.0 | 29.686360 | 0.032 | 0.076 | 0.105 | 0.103 | 0.110 | -0.0 | 0.0 | 0.0 | 1.043472 | 6.295875 | 33.092594 | 36.379044 | 31.926540 | 194.036270 | 194.489800 | 179.421300 | 187.124920 | 35.059998 | 34.809998 | 32.963997 | 44.114000 | 40.564000 | 34.663998 | 30.764000 | 33.480000 | Las Vegas | 2025-06-01 | 20:00:00+00:00 | 2025 | 06 | 01 | 20 | 00 | 00+00 | 00 |
| 2229 | 2229 | 2025-06-01 21:00:00+00:00 | 39.269500 | 12.0 | 4.657262 | 36.113987 | 2.0 | 0.0 | 0.0 | 0.0 | 1004.7 | 940.36640 | 100.0 | 0.0 | 5.0 | 100.0 | 90000.0 | 25.499080 | 0.031 | 0.076 | 0.105 | 0.102 | 0.110 | -0.0 | 0.0 | 0.0 | 1.006306 | 6.241590 | 28.585450 | 46.478695 | 30.993471 | 210.547500 | 211.087430 | 235.304780 | 210.735410 | 34.809998 | 34.559998 | 32.864000 | 44.364000 | 40.814000 | 35.364000 | 30.764000 | 29.160000 | Las Vegas | 2025-06-01 | 21:00:00+00:00 | 2025 | 06 | 01 | 21 | 00 | 00+00 | 00 |
| 2230 | 2230 | 2025-06-01 22:00:00+00:00 | 39.219500 | 12.0 | 4.618772 | 35.940290 | 2.0 | 0.0 | 0.0 | 0.0 | 1003.7 | 939.42020 | 55.0 | 0.0 | 0.0 | 55.0 | 90000.0 | 24.485292 | 0.031 | 0.076 | 0.105 | 0.102 | 0.110 | -0.0 | 0.0 | 0.0 | 0.951399 | 6.224906 | 27.267473 | 49.953790 | 34.131718 | 221.423570 | 222.324510 | 222.018460 | 217.716050 | 34.059998 | 33.809998 | 32.764000 | 42.963997 | 40.814000 | 35.913998 | 30.814001 | 28.080000 | Las Vegas | 2025-06-01 | 22:00:00+00:00 | 2025 | 06 | 01 | 22 | 00 | 00+00 | 00 |
| 2231 | 2231 | 2025-06-01 23:00:00+00:00 | 38.669502 | 12.0 | 4.195249 | 33.902687 | 3.0 | 0.0 | 0.0 | 0.0 | 1003.6 | 939.21796 | 76.0 | 0.0 | 0.0 | 76.0 | 90000.0 | 29.810522 | 0.031 | 0.076 | 0.105 | 0.102 | 0.110 | -0.0 | 0.0 | 0.0 | 0.912528 | 6.043872 | 35.188046 | 48.185936 | 33.985218 | 227.447080 | 228.317870 | 182.622340 | 216.384450 | 32.859997 | 32.609997 | 32.764000 | 42.413998 | 40.364000 | 36.314000 | 30.864000 | 36.360000 | Las Vegas | 2025-06-01 | 23:00:00+00:00 | 2025 | 06 | 01 | 23 | 00 | 00+00 | 00 |
1795 rows × 53 columns
In [45]:
# Try to find the other Columns value when the 'temperature_2m' is maximum
#temperature_2m_max = df[df1.columns[1]].max() # OR temperature_2m_max = df['temperature_2m'].max()
temperature_2m_max = df1['temperature_2m'].max()
print(temperature_2m_max)
df1ht2mmax = df1.where(df1['temperature_2m'] == temperature_2m_max)
df1ht2mmax = df1ht2mmax.dropna()
df1ht2mmax[['date','temperature_2m','relative_humidity_2m','dew_point_2m','precipitation_probability','cloud_cover_low','cloud_cover_mid',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']]
41.3195
Out[45]:
| date | temperature_2m | relative_humidity_2m | dew_point_2m | precipitation_probability | cloud_cover_low | cloud_cover_mid | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 2206 | 2025-05-31 22:00:00+00:00 | 41.3195 | 7.0 | -1.346894 | 0.0 | 0.0 | 46.0 | 0.0 | 5.116561 | 0.031 | 0.077 | 0.106 | 7.356871 | 37.809998 | 50.064 |
In [46]:
# Try to find the other Columns value when the 'temperature_2m' is minimum
temperature_2m_min = df1['temperature_2m'].min()
print(temperature_2m_min)
df1ht2mmin = df1.where(df1['temperature_2m'] == temperature_2m_min)
df1ht2mmin = df1ht2mmin.dropna()
df1ht2mmin[['date','temperature_2m','relative_humidity_2m','dew_point_2m','precipitation_probability','cloud_cover_low','cloud_cover_mid',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']]
6.4195
Out[46]:
| date | temperature_2m | relative_humidity_2m | dew_point_2m | precipitation_probability | cloud_cover_low | cloud_cover_mid | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 805 | 2025-04-03 13:00:00+00:00 | 6.4195 | 62.0 | -0.337105 | 0.0 | 38.0 | 42.0 | 0.0 | 4.379589 | 0.073 | 0.041 | 0.093 | 0.366641 | 7.41 | 6.114 |
In [47]:
# Find values when the 'precipitation_probability' is Maximum
precipitation_probability_max = df1['precipitation_probability'].max()
print(precipitation_probability_max)
df1hppmax = df1.where(df1['precipitation_probability'] == precipitation_probability_max).sort_values(by = ['cloud_cover_mid','soil_moisture_0_to_1cm'], ascending=[False,False])
df1hppmax = df1hppmax.dropna()
df1hppmax[['date','temperature_2m','relative_humidity_2m','dew_point_2m','precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm',
'cloud_cover_low','cloud_cover_high','wind_speed_10m','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']]
70.0
Out[47]:
| date | temperature_2m | relative_humidity_2m | dew_point_2m | precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | cloud_cover_low | cloud_cover_high | wind_speed_10m | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1583 | 2025-05-05 23:00:00+00:00 | 15.3695 | 93.0 | 14.243151 | 70.0 | 80.0 | 0.264 | 75.0 | 60.0 | 12.682018 | 0.135 | 0.137 | 0.122335 | 14.21 | 17.564001 |
In [48]:
# Find values when the 'precipitation_probability' is 50%
df1hpp50 = df1.where(df1['precipitation_probability'] == 50).sort_values(by = ['cloud_cover_mid'], ascending=[False])
df1hpp50 = df1hpp50.dropna()
df1hpp50[['date','temperature_2m','relative_humidity_2m','dew_point_2m','precipitation_probability','cloud_cover_mid','cloud_cover_low',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']]
Out[48]:
| date | temperature_2m | relative_humidity_2m | dew_point_2m | precipitation_probability | cloud_cover_mid | cloud_cover_low | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm |
|---|
In [49]:
# Find values when the 'precipitation_probability' is Minimum
precipitation_probability_min = df1['precipitation_probability'].min()
print(precipitation_probability_min)
df1hppmin = df1.where(df1['precipitation_probability'] == precipitation_probability_min).sort_values(by = ['cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[True,True,True,True,True,True,True,True]).head(5)
df1hppmin = df1hppmin.dropna()
df1hppmin[['date','temperature_2m','precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm',
'soil_moisture_3_to_9cm','relative_humidity_2m','cloud_cover_high','dew_point_2m','vapour_pressure_deficit',
'cloud_cover_low','wind_speed_10m','soil_moisture_1_to_3cm','temperature_80m','soil_temperature_0cm']]
0.0
Out[49]:
| date | temperature_2m | precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_3_to_9cm | relative_humidity_2m | cloud_cover_high | dew_point_2m | vapour_pressure_deficit | cloud_cover_low | wind_speed_10m | soil_moisture_1_to_3cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 468 | 2025-03-20 12:00:00+00:00 | 12.3595 | 0.0 | -1.0 | 0.038 | 0.100 | 14.0 | 100.0 | -14.438404 | 1.236642 | 0.0 | 6.109403 | 0.067 | 11.910000 | 5.614 |
| 1009 | 2025-04-12 01:00:00+00:00 | 34.8695 | 0.0 | 0.0 | 0.030 | 0.088 | 6.0 | 0.0 | -8.026972 | 5.248589 | 0.0 | 8.311245 | 0.043 | 33.359997 | 33.064 |
| 1010 | 2025-04-12 02:00:00+00:00 | 32.5195 | 0.0 | 0.0 | 0.030 | 0.088 | 7.0 | 0.0 | -7.733895 | 4.554785 | 0.0 | 17.709658 | 0.043 | 32.110000 | 28.664 |
| 1006 | 2025-04-11 22:00:00+00:00 | 35.0195 | 0.0 | 0.0 | 0.030 | 0.088 | 7.0 | 0.0 | -5.913356 | 5.235899 | 0.0 | 18.875126 | 0.043 | 31.960001 | 42.764 |
| 1007 | 2025-04-11 23:00:00+00:00 | 35.3695 | 0.0 | 0.0 | 0.030 | 0.088 | 7.0 | 0.0 | -5.658859 | 5.337968 | 0.0 | 15.077082 | 0.043 | 32.710000 | 40.264 |
In [111]:
# Try to find the other Columns value when the 'relative_humidity_2m' is maximum
relative_humidity_2m_max = df1['relative_humidity_2m'].max()
print(relative_humidity_2m_max)
df1hrh2mmax = df1.where(df1['relative_humidity_2m'] == relative_humidity_2m_max)
df1hrh2mmax = df1hrh2mmax.dropna()
df1hrh2mmax[['date','relative_humidity_2m','temperature_2m','dew_point_2m','precipitation_probability','cloud_cover_low','cloud_cover_mid',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']]
96.0
Out[111]:
| date | relative_humidity_2m | temperature_2m | dew_point_2m | precipitation_probability | cloud_cover_low | cloud_cover_mid | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1597 | 2025-05-06 13:00:00+00:00 | 96.0 | 14.3695 | 13.739599 | 4.0 | 100.0 | 34.0 | 0.0 | 3.219938 | 0.206 | 0.204 | 0.158 | 0.065552 | 13.51 | 14.064 |
In [ ]:
# Unique Fact: when the 'relative_humidity_2m' reach Maximum value, the 'precipitation_probability' reach almost zero percent (4%).
# This contradicts common logic, which suggests high humidity increases precipitation chances.
In [112]:
# Find values when the 'relative_humidity_2m' is 50%
df1hrh50 = df1.where(df1['relative_humidity_2m'] == 50).sort_values(by = ['precipitation_probability'], ascending=[False])
df1hrh50 = df1hrh50.dropna()
df1hrh50[['date','relative_humidity_2m','temperature_2m','vapour_pressure_deficit','dew_point_2m','precipitation_probability','cloud_cover_mid','cloud_cover_low',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','temperature_80m','soil_temperature_0cm']]
Out[112]:
| date | relative_humidity_2m | temperature_2m | vapour_pressure_deficit | dew_point_2m | precipitation_probability | cloud_cover_mid | cloud_cover_low | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 788 | 2025-04-02 20:00:00+00:00 | 50.0 | 11.819500 | 0.694066 | 1.725359 | 15.0 | 100.0 | 13.0 | 0.0 | 23.977188 | 0.096 | 0.058 | 0.094 | 9.860001 | 25.864000 |
| 1612 | 2025-05-07 04:00:00+00:00 | 50.0 | 16.469501 | 0.937701 | 6.011217 | 10.0 | 99.0 | 0.0 | 0.0 | 21.933426 | 0.232 | 0.178 | 0.174 | 14.960000 | 15.864000 |
| 783 | 2025-04-02 15:00:00+00:00 | 50.0 | 8.869500 | 0.570095 | -0.996837 | 6.0 | 100.0 | 100.0 | 0.0 | 29.810522 | 0.043 | 0.058 | 0.094 | 8.810000 | 12.464001 |
| 1604 | 2025-05-06 20:00:00+00:00 | 50.0 | 20.019500 | 1.171222 | 9.279046 | 6.0 | 100.0 | 100.0 | 0.0 | 8.647496 | 0.158 | 0.186 | 0.161 | 19.110000 | 25.214000 |
| 1603 | 2025-05-06 19:00:00+00:00 | 50.0 | 21.669500 | 1.296059 | 10.796674 | 5.0 | 13.0 | 20.0 | 0.0 | 8.145870 | 0.171 | 0.189 | 0.161 | 18.010000 | 24.714000 |
| 1641 | 2025-05-08 09:00:00+00:00 | 50.0 | 17.769500 | 1.017986 | 7.208303 | 0.0 | 0.0 | 0.0 | 0.0 | 7.091177 | 0.129 | 0.180 | 0.191 | 22.660000 | 17.414000 |
In [ ]:
# Conclusion: when 'relative_humidity_2m' reaches 50%, the highest 'precipitation_probability' is 15%.
# This is less than half the 'precipitation_probability' in Las Vegas.
In [113]:
# Try to find the other Columns value when the 'relative_humidity_2m' is minimum
relative_humidity_2m_min = df1['relative_humidity_2m'].min()
print(relative_humidity_2m_min)
df1hrh2mmin = df1.where(df1['relative_humidity_2m'] == relative_humidity_2m_min)
df1hrh2mmin = df1hrh2mmin.dropna()
df1hrh2mmin[['date','relative_humidity_2m','temperature_2m','dew_point_2m','precipitation_probability','cloud_cover_low','cloud_cover_mid',
'cloud_cover_high','wind_speed_10m','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm',
'soil_moisture_3_to_9cm','vapour_pressure_deficit','temperature_80m','soil_temperature_0cm']].sort_values(by='precipitation_probability', ascending=True)
4.0
Out[113]:
| date | relative_humidity_2m | temperature_2m | dew_point_2m | precipitation_probability | cloud_cover_low | cloud_cover_mid | cloud_cover_high | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | vapour_pressure_deficit | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1990 | 2025-05-22 22:00:00+00:00 | 4.0 | 37.869500 | -11.091969 | 0.0 | 0.0 | 0.0 | 0.0 | 26.525429 | 0.033 | 0.088 | 0.115 | 6.316258 | 34.059998 | 43.814000 |
| 1991 | 2025-05-22 23:00:00+00:00 | 4.0 | 38.069500 | -10.955633 | 0.0 | 0.0 | 0.0 | 0.0 | 25.864943 | 0.033 | 0.088 | 0.115 | 6.384925 | 34.109997 | 41.514000 |
| 1992 | 2025-05-23 00:00:00+00:00 | 4.0 | 37.519500 | -11.330648 | 0.0 | 0.0 | 0.0 | 0.0 | 27.059933 | 0.033 | 0.088 | 0.115 | 6.197625 | 34.410000 | 38.963997 |
| 2015 | 2025-05-23 23:00:00+00:00 | 4.0 | 36.619500 | -11.944863 | 0.0 | 0.0 | 0.0 | 0.0 | 25.772078 | 0.033 | 0.086 | 0.113 | 5.901392 | 33.010000 | 41.064000 |
| 2017 | 2025-05-24 01:00:00+00:00 | 4.0 | 35.419502 | -12.764890 | 0.0 | 0.0 | 0.0 | 0.0 | 25.704100 | 0.033 | 0.086 | 0.113 | 5.525542 | 32.960000 | 35.413998 |
| 2018 | 2025-05-24 02:00:00+00:00 | 4.0 | 34.469500 | -13.414946 | 0.0 | 0.0 | 0.0 | 0.0 | 21.129883 | 0.033 | 0.086 | 0.113 | 5.242886 | 32.960000 | 31.914000 |
In [ ]:
# Conclusion:
# - when the 'relative_humidity_2m' reaches minimum (4%), the 'precipitation_probability' reaches minimum also (0%).
# - when the 'relative_humidity_2m' reaches minimum (4%), the 'dew_point_2m' reaches very low also (-13.4C) (lower than Nashville, -3.68C).
In [50]:
# Find Daily Average of Precipitation Probability (Table)
dfda1 = df1.groupby('ymd')[df1h.columns[0:]].mean().sort_values('precipitation_probability', ascending=False).head(5)
dfda1
Out[50]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ymd | ||||||||||||||
| 2025-05-04 | 25.833333 | 90.166667 | 0.068042 | 0.067083 | 46.125000 | 7.227499 | 1.409673 | 20.388250 | 84.416667 | 41.875000 | 13.252176 | 0.104875 | 19.235000 | 23.645250 |
| 2025-05-06 | 16.041667 | 46.166667 | 0.198167 | 0.192750 | 80.708333 | 12.630033 | 0.416490 | 16.373667 | 4.083333 | 77.791667 | 5.971521 | 0.154250 | 15.266250 | 18.634834 |
| 2025-05-05 | 15.083333 | 87.833333 | 0.088417 | 0.091833 | 61.750000 | 9.809922 | 0.780684 | 17.438250 | 43.166667 | 47.416667 | 7.254426 | 0.136167 | 15.976667 | 20.030667 |
| 2025-04-02 | 6.541667 | 69.541667 | 0.050708 | 0.058167 | 34.041667 | -4.608082 | 1.001607 | 12.475750 | 0.000000 | 13.583333 | 20.729977 | 0.093875 | 11.568334 | 16.072334 |
| 2025-04-03 | 3.166667 | 56.916667 | 0.073500 | 0.040042 | 41.166667 | -1.681834 | 0.864802 | 11.790333 | 0.000000 | 19.958333 | 8.770723 | 0.093000 | 9.916250 | 15.389000 |
In [51]:
# Find Daily Average of relative_humidity_2m (Table)
dfda1 = df1.groupby('ymd')[df1h.columns[[4,0]]].mean().sort_values('relative_humidity_2m', ascending=False).head(5)
dfda1
Out[51]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| ymd | ||
| 2025-05-06 | 80.708333 | 16.041667 |
| 2025-05-05 | 61.750000 | 15.083333 |
| 2025-05-07 | 47.625000 | 2.750000 |
| 2025-05-04 | 46.125000 | 25.833333 |
| 2025-04-03 | 41.166667 | 3.166667 |
In [52]:
# Find Daily Average of Precipitation Probability (Plot)
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20, 10) # Set global size for all plots
dfda2 = df1.groupby('ymd')[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].mean().sort_values('ymd', ascending=True)
dfda2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[52]:
array([<Axes: xlabel='ymd'>, <Axes: xlabel='ymd'>], dtype=object)
In [107]:
# Find Daily Average of Relative Humidity 2m (Table)
dfda3 = df1.groupby('ymd')[df1h.columns[[4,0]]].mean().sort_values('relative_humidity_2m', ascending=False).head(10)
dfda3
Out[107]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| ymd | ||
| 2025-05-06 | 80.708333 | 16.041667 |
| 2025-05-05 | 61.750000 | 15.083333 |
| 2025-05-07 | 47.625000 | 2.750000 |
| 2025-05-04 | 46.125000 | 25.833333 |
| 2025-04-03 | 41.166667 | 3.166667 |
| 2025-05-08 | 35.125000 | 0.000000 |
| 2025-04-02 | 34.041667 | 6.541667 |
| 2025-05-18 | 33.125000 | 2.708333 |
| 2025-04-27 | 31.916667 | 0.708333 |
| 2025-03-31 | 30.166667 | 0.000000 |
In [53]:
# Find Monthly Average values of Precipitation Probability (Table)
dfma1 = df1.groupby(['year','month'])[df1h.columns[0:]].mean().sort_values(by = ['precipitation_probability','cloud_cover_mid',
'soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m','vapour_pressure_deficit'],
ascending=[False,False,False,False,False,False,False,False,False,False])
dfma1
Out[53]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | month | ||||||||||||||
| 2025 | 05 | 2.291667 | 19.543011 | 0.055297 | 0.104656 | 21.094086 | -1.432394 | 3.013983 | 26.219500 | 11.091398 | 5.444892 | 12.789477 | 0.129192 | 25.585807 | 28.517293 |
| 06 | 0.708333 | 49.541667 | 0.031708 | 0.077000 | 10.916667 | 0.225036 | 5.343179 | 35.834084 | 64.125000 | 0.000000 | 17.594226 | 0.106000 | 33.482916 | 36.422333 | |
| 04 | 0.644444 | 16.631944 | 0.035333 | 0.046068 | 17.704167 | -6.591822 | 2.215045 | 20.750542 | 11.543056 | 1.559722 | 13.329188 | 0.088725 | 20.237639 | 23.047889 | |
| 03 | 0.110749 | 16.781759 | 0.034495 | 0.064528 | 19.198697 | -4.984482 | 1.991165 | 19.878979 | 34.921824 | 0.032573 | 11.694789 | 0.097678 | 19.683290 | 20.777681 |
In [54]:
# Find Monthly Average values of Precipitation Probability (Plot)
dfma2 = df1.groupby(['year','month'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].mean().sort_values(by = ['year', 'month'], ascending=[True,True])
# dfma2.plot(subplots=[('precipitation_probability', 'soil_moisture_0_to_1cm')])
dfma2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[54]:
array([<Axes: xlabel='year,month'>, <Axes: xlabel='year,month'>],
dtype=object)
In [108]:
# Find Monthly Average values of Relative Humidity 2m (Table)
dfma3 = df1.groupby(['year','month'])[df1h.columns[[4,0]]].mean().sort_values(by = ['relative_humidity_2m'], ascending=[False])
dfma3
Out[108]:
| relative_humidity_2m | precipitation_probability | ||
|---|---|---|---|
| year | month | ||
| 2025 | 05 | 21.094086 | 2.291667 |
| 03 | 19.198697 | 0.110749 | |
| 04 | 17.704167 | 0.644444 | |
| 06 | 10.916667 | 0.708333 |
In [55]:
# Find Hourly Average values of Precipitation Probability (Table)
dfha1 = df1.groupby(['hmstz'])[df1h.columns[0:]].mean().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfha1
Out[55]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| hmstz | ||||||||||||||
| 23:00:00+00:00 | 2.106667 | 24.253333 | 0.044000 | 0.073373 | 13.706667 | -4.714941 | 3.636448 | 28.481367 | 22.386667 | 2.720000 | 16.456923 | 0.106573 | 25.761333 | 34.534000 |
| 22:00:00+00:00 | 1.920000 | 21.093333 | 0.042773 | 0.073333 | 14.213333 | -4.141996 | 3.557681 | 28.212700 | 21.266667 | 2.946667 | 15.882570 | 0.106680 | 25.510667 | 36.917999 |
| 00:00:00+00:00 | 1.662162 | 22.432432 | 0.042459 | 0.073824 | 13.270270 | -4.899519 | 3.613658 | 28.393149 | 15.648649 | 2.229730 | 15.621791 | 0.107784 | 25.958649 | 31.574810 |
| 21:00:00+00:00 | 1.586667 | 19.106667 | 0.042973 | 0.073427 | 14.866667 | -3.568545 | 3.451940 | 27.828034 | 19.640000 | 4.266667 | 16.420983 | 0.106840 | 25.036667 | 38.148666 |
| 19:00:00+00:00 | 1.466667 | 16.306667 | 0.043933 | 0.073627 | 16.560000 | -2.963268 | 3.088773 | 26.308034 | 15.986667 | 2.640000 | 13.408874 | 0.107000 | 23.536667 | 37.547332 |
| 06:00:00+00:00 | 1.413333 | 19.693333 | 0.043480 | 0.074360 | 18.973333 | -4.692392 | 2.273213 | 21.802034 | 18.626667 | 1.480000 | 11.827843 | 0.107427 | 22.652000 | 19.536000 |
| 05:00:00+00:00 | 1.413333 | 17.186667 | 0.043453 | 0.074373 | 18.146667 | -4.985937 | 2.407996 | 22.539367 | 17.093333 | 2.040000 | 12.258795 | 0.107413 | 23.248667 | 19.642667 |
| 20:00:00+00:00 | 1.413333 | 16.506667 | 0.043480 | 0.073507 | 15.346667 | -3.434113 | 3.303648 | 27.206034 | 16.466667 | 4.920000 | 15.200141 | 0.106933 | 24.416000 | 38.527999 |
| 16:00:00+00:00 | 1.400000 | 11.626667 | 0.045493 | 0.073733 | 23.000000 | -1.966810 | 2.220290 | 22.006700 | 11.786667 | 4.493333 | 11.209813 | 0.107187 | 20.135334 | 27.214667 |
| 01:00:00+00:00 | 1.364865 | 22.135135 | 0.041703 | 0.073824 | 13.040541 | -5.624604 | 3.526115 | 27.933014 | 14.027027 | 3.040541 | 15.342573 | 0.107676 | 25.972162 | 28.274811 |
| 17:00:00+00:00 | 1.253333 | 12.826667 | 0.045400 | 0.073640 | 20.746667 | -2.113576 | 2.504790 | 23.600700 | 12.226667 | 4.546667 | 12.077289 | 0.107067 | 21.408667 | 31.439333 |
| 15:00:00+00:00 | 1.186667 | 15.160000 | 0.044147 | 0.073853 | 25.333333 | -2.089175 | 1.907255 | 20.053367 | 14.413333 | 5.333333 | 10.679532 | 0.107227 | 18.944000 | 21.740667 |
| 04:00:00+00:00 | 1.148649 | 19.527027 | 0.043095 | 0.073892 | 16.472973 | -5.860641 | 2.637508 | 23.703284 | 17.229730 | 1.418919 | 12.263542 | 0.107581 | 24.139730 | 20.843054 |
| 18:00:00+00:00 | 1.146667 | 15.560000 | 0.044920 | 0.073693 | 18.640000 | -2.310659 | 2.796328 | 25.056700 | 12.040000 | 1.960000 | 13.473858 | 0.107133 | 22.498667 | 35.103333 |
| 02:00:00+00:00 | 1.108108 | 20.959459 | 0.042000 | 0.073865 | 13.594595 | -6.073569 | 3.278218 | 26.678284 | 13.000000 | 2.594595 | 14.039328 | 0.107649 | 25.663378 | 24.609270 |
| 07:00:00+00:00 | 1.106667 | 18.240000 | 0.043573 | 0.074533 | 20.893333 | -4.227496 | 2.083288 | 20.730700 | 20.053333 | 1.226667 | 11.549302 | 0.107413 | 22.136667 | 18.688667 |
| 14:00:00+00:00 | 1.093333 | 18.653333 | 0.043573 | 0.073960 | 26.840000 | -2.922083 | 1.634909 | 17.983367 | 14.360000 | 4.600000 | 10.348632 | 0.107240 | 18.409334 | 16.935334 |
| 03:00:00+00:00 | 1.067568 | 18.702703 | 0.041608 | 0.073892 | 14.743243 | -6.412960 | 2.891001 | 24.889771 | 16.189189 | 1.405405 | 12.525557 | 0.107608 | 24.941081 | 22.227514 |
| 13:00:00+00:00 | 1.040000 | 16.733333 | 0.043413 | 0.074093 | 26.906667 | -3.631291 | 1.525281 | 17.000034 | 15.266667 | 4.266667 | 10.705742 | 0.107293 | 18.731334 | 15.197334 |
| 08:00:00+00:00 | 1.013333 | 20.546667 | 0.043373 | 0.074467 | 21.693333 | -4.419502 | 1.980057 | 20.010034 | 17.120000 | 0.480000 | 12.706578 | 0.107333 | 21.585334 | 17.969334 |
| 12:00:00+00:00 | 0.920000 | 16.946667 | 0.043400 | 0.074200 | 25.133333 | -4.083975 | 1.616041 | 17.612700 | 14.466667 | 1.906667 | 11.465712 | 0.107333 | 19.197333 | 15.551334 |
| 10:00:00+00:00 | 0.693333 | 18.960000 | 0.043240 | 0.074200 | 23.333333 | -4.477055 | 1.762027 | 18.589367 | 15.733333 | 3.440000 | 10.635036 | 0.107320 | 20.378667 | 16.764667 |
| 11:00:00+00:00 | 0.640000 | 19.173333 | 0.043200 | 0.074133 | 24.120000 | -4.359386 | 1.691183 | 18.083367 | 13.786667 | 2.240000 | 11.644047 | 0.107227 | 19.796667 | 16.214667 |
| 09:00:00+00:00 | 0.520000 | 17.133333 | 0.043213 | 0.074333 | 22.666667 | -4.233425 | 1.848442 | 19.193367 | 16.506667 | 3.066667 | 11.518742 | 0.107333 | 21.007334 | 17.353334 |
In [56]:
# Find Hourly Average values of Precipitation Probability (Plot)
dfha2 = df1.groupby(['hmstz'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].mean().sort_values(by = ['hmstz'], ascending=[True])
dfha2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[56]:
array([<Axes: xlabel='hmstz'>, <Axes: xlabel='hmstz'>], dtype=object)
In [109]:
# Find Hourly Average values of Relative Humidity 2m (Table)
dfha3 = df1.groupby(['hmstz'])[df1h.columns[[4,0]]].mean().sort_values(by = ['relative_humidity_2m'], ascending=[False])
dfha3
Out[109]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| hmstz | ||
| 13:00:00+00:00 | 26.906667 | 1.040000 |
| 14:00:00+00:00 | 26.840000 | 1.093333 |
| 15:00:00+00:00 | 25.333333 | 1.186667 |
| 12:00:00+00:00 | 25.133333 | 0.920000 |
| 11:00:00+00:00 | 24.120000 | 0.640000 |
| 10:00:00+00:00 | 23.333333 | 0.693333 |
| 16:00:00+00:00 | 23.000000 | 1.400000 |
| 09:00:00+00:00 | 22.666667 | 0.520000 |
| 08:00:00+00:00 | 21.693333 | 1.013333 |
| 07:00:00+00:00 | 20.893333 | 1.106667 |
| 17:00:00+00:00 | 20.746667 | 1.253333 |
| 06:00:00+00:00 | 18.973333 | 1.413333 |
| 18:00:00+00:00 | 18.640000 | 1.146667 |
| 05:00:00+00:00 | 18.146667 | 1.413333 |
| 19:00:00+00:00 | 16.560000 | 1.466667 |
| 04:00:00+00:00 | 16.472973 | 1.148649 |
| 20:00:00+00:00 | 15.346667 | 1.413333 |
| 21:00:00+00:00 | 14.866667 | 1.586667 |
| 03:00:00+00:00 | 14.743243 | 1.067568 |
| 22:00:00+00:00 | 14.213333 | 1.920000 |
| 23:00:00+00:00 | 13.706667 | 2.106667 |
| 02:00:00+00:00 | 13.594595 | 1.108108 |
| 00:00:00+00:00 | 13.270270 | 1.662162 |
| 01:00:00+00:00 | 13.040541 | 1.364865 |
In [57]:
# Observation Result:
# - The fluctuation of average relative humidity has small impact to soil moisture. Maybe, it's due to partial condensation
# phenomenon, so when the precipitation probability is relatively stable (almost flat), the soil moisture is still fluctuating.
# This phenomenon is used in Fog Farming to supply fresh water in arid/semi-arid area by using a lot of nets to help water vapour condensation.
In [58]:
# Find Daily Maximum of Precipitation Probability (Table)
dfdmax1 = df1.groupby('ymd')[df1h.columns[0:]].max().sort_values('precipitation_probability', ascending=False).head(5)
dfdmax1
Out[58]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ymd | ||||||||||||||
| 2025-05-05 | 70.0 | 100.0 | 0.264 | 0.135 | 93.0 | 14.243151 | 1.271882 | 20.6695 | 99.0 | 100.0 | 16.923830 | 0.138 | 17.960001 | 24.264000 |
| 2025-05-06 | 54.0 | 100.0 | 0.245 | 0.206 | 96.0 | 14.011498 | 1.296059 | 21.6695 | 98.0 | 100.0 | 18.971008 | 0.161 | 22.160000 | 27.664000 |
| 2025-05-04 | 47.0 | 100.0 | 0.181 | 0.072 | 76.0 | 11.103669 | 2.930076 | 27.6695 | 99.0 | 100.0 | 27.776047 | 0.112 | 26.560000 | 31.664000 |
| 2025-04-02 | 23.0 | 100.0 | 0.117 | 0.060 | 59.0 | 2.182480 | 1.845239 | 18.4195 | 0.0 | 100.0 | 37.664326 | 0.094 | 15.560000 | 25.864000 |
| 2025-05-03 | 20.0 | 99.0 | 0.042 | 0.051 | 24.0 | 5.358303 | 4.346065 | 32.2695 | 100.0 | 0.0 | 41.180286 | 0.085 | 29.610000 | 41.463997 |
In [59]:
# Find Daily Maximum of Precipitation Probability (Plot)
dfdmax2 = df1.groupby('ymd')[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].max().sort_values('ymd', ascending=True)
dfdmax2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[59]:
array([<Axes: xlabel='ymd'>, <Axes: xlabel='ymd'>], dtype=object)
In [60]:
# Find Daily Maximum of Relative Humidity 2m (Table)
dfdmax2 = df1.groupby('ymd')[df1h.columns[[4,0]]].max().sort_values('relative_humidity_2m', ascending=False).head(10)
dfdmax2
Out[60]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| ymd | ||
| 2025-05-06 | 96.0 | 54.0 |
| 2025-05-05 | 93.0 | 70.0 |
| 2025-05-04 | 76.0 | 47.0 |
| 2025-05-07 | 69.0 | 10.0 |
| 2025-04-03 | 63.0 | 11.0 |
| 2025-05-08 | 59.0 | 0.0 |
| 2025-04-02 | 59.0 | 23.0 |
| 2025-05-18 | 55.0 | 20.0 |
| 2025-04-27 | 45.0 | 4.0 |
| 2025-03-31 | 43.0 | 0.0 |
In [61]:
# Find Monthly Maximum values of Precipitation Probability (Table)
dfmmax1 = df1.groupby(['year','month'])[df1h.columns[0:]].max().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfmmax1
Out[61]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | month | ||||||||||||||
| 2025 | 05 | 70.0 | 100.0 | 0.264 | 0.245 | 96.0 | 14.243151 | 7.356871 | 41.3195 | 100.0 | 100.0 | 47.619442 | 0.194 | 38.010000 | 51.213997 |
| 04 | 23.0 | 100.0 | 0.121 | 0.060 | 63.0 | 3.911096 | 5.367447 | 35.4695 | 100.0 | 100.0 | 40.352130 | 0.095 | 33.359997 | 44.814000 | |
| 03 | 3.0 | 100.0 | 0.047 | 0.069 | 43.0 | 4.301982 | 4.619241 | 32.7695 | 100.0 | 5.0 | 45.504055 | 0.101 | 30.310000 | 41.064000 | |
| 06 | 3.0 | 100.0 | 0.032 | 0.078 | 15.0 | 4.657262 | 7.146035 | 40.7695 | 100.0 | 0.0 | 29.810522 | 0.107 | 38.309998 | 45.614000 |
In [62]:
# Find Monthly Maximum values of Precipitation Probability (Plot)
dfmmax2 = df1.groupby(['year','month'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].max().sort_values(by = ['year','month'], ascending=[True,True])
dfmmax2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[62]:
array([<Axes: xlabel='year,month'>, <Axes: xlabel='year,month'>],
dtype=object)
In [63]:
# Find Monthly Maximum values of Relative Humidity 2m (Table)
dfmmax2 = df1.groupby(['year','month'])[df1h.columns[[4,0]]].max().sort_values(by = ['relative_humidity_2m'], ascending=[False])
dfmmax2
Out[63]:
| relative_humidity_2m | precipitation_probability | ||
|---|---|---|---|
| year | month | ||
| 2025 | 05 | 96.0 | 70.0 |
| 04 | 63.0 | 23.0 | |
| 03 | 43.0 | 3.0 | |
| 06 | 15.0 | 3.0 |
In [64]:
# Find Hourly Maximum values of Precipitation Probability (Table)
dfhmax1 = df1.groupby(['hmstz'])[df1h.columns[0:]].max().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfhmax1
Out[64]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| hmstz | ||||||||||||||
| 23:00:00+00:00 | 70.0 | 100.0 | 0.264 | 0.196 | 93.0 | 14.243151 | 7.012977 | 40.619500 | 100.0 | 98.0 | 44.554268 | 0.193 | 38.010000 | 47.564000 |
| 06:00:00+00:00 | 54.0 | 100.0 | 0.257 | 0.232 | 91.0 | 13.314399 | 5.457564 | 36.169502 | 100.0 | 67.0 | 29.723444 | 0.192 | 33.960000 | 31.814001 |
| 05:00:00+00:00 | 53.0 | 100.0 | 0.263 | 0.220 | 91.0 | 12.968334 | 5.441759 | 35.719500 | 100.0 | 100.0 | 47.165497 | 0.192 | 35.260000 | 30.914000 |
| 22:00:00+00:00 | 51.0 | 100.0 | 0.184 | 0.199 | 70.0 | 12.657084 | 7.356871 | 41.319500 | 100.0 | 56.0 | 43.565945 | 0.193 | 37.809998 | 50.064000 |
| 16:00:00+00:00 | 47.0 | 100.0 | 0.219 | 0.222 | 85.0 | 13.694188 | 5.022630 | 35.269500 | 100.0 | 100.0 | 36.086296 | 0.193 | 31.360000 | 38.163998 |
| 00:00:00+00:00 | 46.0 | 100.0 | 0.238 | 0.195 | 83.0 | 12.350466 | 7.146035 | 40.769500 | 100.0 | 99.0 | 42.682400 | 0.194 | 38.160000 | 45.364000 |
| 15:00:00+00:00 | 46.0 | 100.0 | 0.224 | 0.225 | 94.0 | 14.011498 | 4.482165 | 33.219500 | 100.0 | 100.0 | 37.017033 | 0.193 | 30.260000 | 33.913998 |
| 17:00:00+00:00 | 44.0 | 100.0 | 0.210 | 0.219 | 76.0 | 13.183273 | 5.467945 | 36.819500 | 100.0 | 100.0 | 36.304707 | 0.194 | 33.010000 | 43.014000 |
| 18:00:00+00:00 | 41.0 | 100.0 | 0.199 | 0.215 | 65.0 | 12.128211 | 6.063636 | 38.519500 | 100.0 | 44.0 | 41.945793 | 0.194 | 34.609997 | 46.413998 |
| 19:00:00+00:00 | 38.0 | 100.0 | 0.185 | 0.211 | 55.0 | 10.796674 | 6.279035 | 39.169502 | 100.0 | 96.0 | 47.619442 | 0.194 | 35.760000 | 49.864000 |
| 11:00:00+00:00 | 37.0 | 100.0 | 0.233 | 0.233 | 93.0 | 13.400532 | 3.909449 | 30.869501 | 100.0 | 100.0 | 39.990795 | 0.190 | 30.110000 | 29.164000 |
| 04:00:00+00:00 | 37.0 | 100.0 | 0.232 | 0.200 | 90.0 | 13.095613 | 5.456728 | 35.769500 | 100.0 | 100.0 | 39.246605 | 0.193 | 36.010000 | 32.264000 |
| 12:00:00+00:00 | 35.0 | 100.0 | 0.233 | 0.232 | 91.0 | 13.413273 | 3.844822 | 30.019500 | 100.0 | 100.0 | 40.063644 | 0.190 | 29.410000 | 28.414000 |
| 14:00:00+00:00 | 35.0 | 100.0 | 0.228 | 0.227 | 94.0 | 13.713717 | 4.152573 | 32.269500 | 100.0 | 100.0 | 34.942265 | 0.192 | 28.960001 | 30.814001 |
| 10:00:00+00:00 | 34.0 | 100.0 | 0.237 | 0.236 | 94.0 | 13.614459 | 3.945178 | 31.069500 | 100.0 | 100.0 | 40.609850 | 0.191 | 30.710001 | 30.014000 |
| 07:00:00+00:00 | 33.0 | 100.0 | 0.262 | 0.245 | 94.0 | 13.664090 | 4.659209 | 34.119500 | 100.0 | 90.0 | 37.971040 | 0.192 | 33.059998 | 30.864000 |
| 03:00:00+00:00 | 32.0 | 100.0 | 0.224 | 0.196 | 88.0 | 12.948845 | 6.151083 | 37.769500 | 100.0 | 100.0 | 46.938255 | 0.193 | 38.059998 | 34.614000 |
| 01:00:00+00:00 | 32.0 | 100.0 | 0.205 | 0.192 | 87.0 | 13.019988 | 7.014605 | 40.419502 | 100.0 | 93.0 | 38.813274 | 0.194 | 38.309998 | 42.163998 |
| 21:00:00+00:00 | 27.0 | 100.0 | 0.182 | 0.203 | 59.0 | 11.153346 | 6.922008 | 40.169502 | 100.0 | 100.0 | 44.653060 | 0.193 | 37.410000 | 50.514000 |
| 08:00:00+00:00 | 26.0 | 100.0 | 0.251 | 0.245 | 93.0 | 13.549228 | 4.443540 | 33.269500 | 100.0 | 36.0 | 43.476612 | 0.191 | 32.660000 | 30.864000 |
| 02:00:00+00:00 | 25.0 | 100.0 | 0.245 | 0.190 | 87.0 | 12.970818 | 6.830472 | 39.919502 | 100.0 | 96.0 | 41.531270 | 0.193 | 38.309998 | 38.114000 |
| 13:00:00+00:00 | 23.0 | 100.0 | 0.231 | 0.230 | 96.0 | 13.739599 | 3.685421 | 30.269500 | 100.0 | 100.0 | 45.504055 | 0.191 | 29.360000 | 28.314001 |
| 20:00:00+00:00 | 21.0 | 100.0 | 0.174 | 0.207 | 51.0 | 10.173055 | 6.597355 | 39.269500 | 100.0 | 100.0 | 44.606598 | 0.194 | 36.609997 | 51.213997 |
| 09:00:00+00:00 | 14.0 | 100.0 | 0.242 | 0.241 | 94.0 | 13.664090 | 4.237368 | 32.219500 | 100.0 | 100.0 | 38.479603 | 0.191 | 31.160000 | 30.764000 |
In [65]:
# Find Hourly Maximum values of Precipitation Probability (Plot)
dfhmax2 = df1.groupby(['hmstz'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].max().sort_values(by = ['hmstz'], ascending=[True])
dfhmax2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[65]:
array([<Axes: xlabel='hmstz'>, <Axes: xlabel='hmstz'>], dtype=object)
In [66]:
# Find Hourly Maximum values of Relative Humidity 2m (Table)
dfhmax2 = df1.groupby(['hmstz'])[df1h.columns[[4,0]]].max().sort_values(by = ['relative_humidity_2m'], ascending=[False])
dfhmax2
Out[66]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| hmstz | ||
| 13:00:00+00:00 | 96.0 | 23.0 |
| 09:00:00+00:00 | 94.0 | 14.0 |
| 15:00:00+00:00 | 94.0 | 46.0 |
| 14:00:00+00:00 | 94.0 | 35.0 |
| 10:00:00+00:00 | 94.0 | 34.0 |
| 07:00:00+00:00 | 94.0 | 33.0 |
| 23:00:00+00:00 | 93.0 | 70.0 |
| 11:00:00+00:00 | 93.0 | 37.0 |
| 08:00:00+00:00 | 93.0 | 26.0 |
| 12:00:00+00:00 | 91.0 | 35.0 |
| 06:00:00+00:00 | 91.0 | 54.0 |
| 05:00:00+00:00 | 91.0 | 53.0 |
| 04:00:00+00:00 | 90.0 | 37.0 |
| 03:00:00+00:00 | 88.0 | 32.0 |
| 01:00:00+00:00 | 87.0 | 32.0 |
| 02:00:00+00:00 | 87.0 | 25.0 |
| 16:00:00+00:00 | 85.0 | 47.0 |
| 00:00:00+00:00 | 83.0 | 46.0 |
| 17:00:00+00:00 | 76.0 | 44.0 |
| 22:00:00+00:00 | 70.0 | 51.0 |
| 18:00:00+00:00 | 65.0 | 41.0 |
| 21:00:00+00:00 | 59.0 | 27.0 |
| 19:00:00+00:00 | 55.0 | 38.0 |
| 20:00:00+00:00 | 51.0 | 21.0 |
In [67]:
# Observation Result:
# - When the maximum value of 'relative_humidity_2m' and 'precipitation_probability' decreases, the maximum value of moisture rises. Maybe, it's due to
# the occurence of rain (humidity falls due to part of water vapour turns to liquid, and after that, the precipitation_probability falls also because
# it's already release the water vapour as rain).
In [68]:
# Find Daily Minimum of Precipitation Probability (Table)
dfdmin1 = df1.groupby('ymd')[df1h.columns[0:]].min().sort_values(['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm'], ascending=True).head(5)
dfdmin1
Out[68]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| ymd | ||||||||||||||
| 2025-03-20 | 0.0 | -1.0 | 0.037 | 0.067 | 12.0 | -14.492778 | 1.159523 | 11.559500 | 0.0 | 0.0 | 2.160000 | 0.099 | 11.060000 | 5.064 |
| 2025-04-10 | 0.0 | 0.0 | 0.030 | 0.041 | 5.0 | -12.953846 | 1.588309 | 17.269500 | 0.0 | 0.0 | 3.758510 | 0.089 | 22.960001 | 16.014 |
| 2025-04-11 | 0.0 | 0.0 | 0.030 | 0.042 | 5.0 | -16.909462 | 1.542835 | 16.519500 | 0.0 | 0.0 | 3.240000 | 0.088 | 23.060000 | 16.964 |
| 2025-04-12 | 0.0 | 0.0 | 0.030 | 0.043 | 6.0 | -11.360874 | 2.328790 | 22.769500 | 0.0 | 0.0 | 7.491114 | 0.087 | 21.710001 | 18.164 |
| 2025-04-13 | 0.0 | 0.0 | 0.030 | 0.043 | 6.0 | -12.012876 | 1.915399 | 20.369501 | 0.0 | 0.0 | 5.447788 | 0.087 | 19.810000 | 15.514 |
In [69]:
# Find Daily Minimum of Precipitation Probability (Plot)
dfdmin2 = df1.groupby('ymd')[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].min().sort_values('ymd', ascending=True)
dfdmin2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[69]:
array([<Axes: xlabel='ymd'>, <Axes: xlabel='ymd'>], dtype=object)
In [70]:
# Find Daily Minimum of relative_humidity_2m (Table)
dfdmin3 = df1.groupby('ymd')[df1h.columns[[4,0]]].min().sort_values(['relative_humidity_2m'], ascending=True).head(5)
dfdmin3
Out[70]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| ymd | ||
| 2025-05-22 | 4.0 | 0.0 |
| 2025-05-23 | 4.0 | 0.0 |
| 2025-05-24 | 4.0 | 0.0 |
| 2025-04-25 | 5.0 | 0.0 |
| 2025-04-07 | 5.0 | 0.0 |
In [71]:
# Observation Result of Daily Minimum Data
# - Fluctuation of 'relative_humidity_2m' is similar to 'soil_moisture_0_to_1cm', different compared to Hourly Minimum Data,
# maybe due to longer time scope.
In [72]:
# Find Monthly Minimum values of Precipitation Probability (Table)
dfmmin1 = df1.groupby(['year','month'])[df1h.columns[0:]].min().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[True,True,True,True,True,True,True,True,True,True])
dfmmin1
Out[72]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | ||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | month | ||||||||||||||
| 2025 | 03 | 0.0 | -1.0 | 0.031 | 0.060 | 7.0 | -14.492778 | 0.848216 | 9.1595 | 0.0 | 0.0 | 0.509117 | 0.095 | 8.060000 | 3.214000 |
| 04 | 0.0 | 0.0 | 0.030 | 0.033 | 5.0 | -17.648800 | 0.364434 | 6.4195 | 0.0 | 0.0 | 0.509117 | 0.085 | 7.310000 | 6.114000 | |
| 05 | 0.0 | 0.0 | 0.030 | 0.049 | 4.0 | -14.610750 | 0.065552 | 13.2195 | 0.0 | 0.0 | 0.720000 | 0.084 | 12.960000 | 13.114000 | |
| 06 | 0.0 | 0.0 | 0.031 | 0.076 | 6.0 | -5.956471 | 3.654214 | 30.0195 | 0.0 | 0.0 | 9.000000 | 0.105 | 28.960001 | 28.314001 |
In [73]:
# Find Monthly Minimum values of Precipitation Probability (Plot)
dfmmin2 = df1.groupby(['year','month'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].min().sort_values(by = ['year','month'], ascending=[True,True])
dfmmin2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[73]:
array([<Axes: xlabel='year,month'>, <Axes: xlabel='year,month'>],
dtype=object)
In [74]:
# Find Monthly Minimum values of Relative Humidity 2m (Table)
dfmmin2 = df1.groupby(['year','month'])[df1h.columns[[4,0]]].min().sort_values(by = ['relative_humidity_2m'], ascending=[True])
dfmmin2
Out[74]:
| relative_humidity_2m | precipitation_probability | ||
|---|---|---|---|
| year | month | ||
| 2025 | 05 | 4.0 | 0.0 |
| 04 | 5.0 | 0.0 | |
| 06 | 6.0 | 0.0 | |
| 03 | 7.0 | 0.0 |
In [75]:
# Find Hourly Minimum values of Precipitation Probability (Table)
dfhmin1 = df1.groupby(['hmstz'])[df1h.columns[0:]].min().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[True,True,True,True,True,True,True,True,True,True])
dfhmin1
Out[75]:
| precipitation_probability | cloud_cover_mid | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | relative_humidity_2m | dew_point_2m | vapour_pressure_deficit | temperature_2m | cloud_cover_high | cloud_cover_low | wind_speed_10m | soil_moisture_3_to_9cm | temperature_80m | soil_temperature_0cm | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| hmstz | ||||||||||||||
| 12:00:00+00:00 | 0.0 | -1.0 | 0.031 | 0.034 | 8.0 | -14.438404 | 0.152326 | 7.419500 | 0.0 | 0.0 | 2.160000 | 0.085 | 7.510000 | 4.314000 |
| 01:00:00+00:00 | 0.0 | 0.0 | 0.030 | 0.033 | 4.0 | -17.574831 | 0.224315 | 14.169499 | 0.0 | 0.0 | 0.509117 | 0.085 | 9.610001 | 16.414000 |
| 02:00:00+00:00 | 0.0 | 0.0 | 0.030 | 0.033 | 4.0 | -16.503166 | 0.223596 | 13.869500 | 0.0 | 0.0 | 2.620839 | 0.085 | 10.560000 | 12.514000 |
| 00:00:00+00:00 | 0.0 | 0.0 | 0.030 | 0.033 | 4.0 | -14.659351 | 0.294288 | 15.169499 | 0.0 | 0.0 | 2.545584 | 0.085 | 9.960000 | 17.164000 |
| 03:00:00+00:00 | 0.0 | 0.0 | 0.030 | 0.033 | 5.0 | -17.648800 | 0.203762 | 12.619500 | 0.0 | 0.0 | 1.484318 | 0.085 | 11.010000 | 10.714001 |
| 04:00:00+00:00 | 0.0 | 0.0 | 0.030 | 0.033 | 5.0 | -16.758446 | 0.167628 | 12.369500 | 0.0 | 0.0 | 1.138420 | 0.085 | 11.110001 | 9.614000 |
| 05:00:00+00:00 | 0.0 | 0.0 | 0.030 | 0.033 | 6.0 | -14.917891 | 0.147976 | 11.419499 | 0.0 | 0.0 | 2.414954 | 0.085 | 10.560000 | 9.264000 |
| 06:00:00+00:00 | 0.0 | 0.0 | 0.030 | 0.033 | 8.0 | -14.170350 | 0.151350 | 10.669499 | 0.0 | 0.0 | 2.620839 | 0.085 | 9.260000 | 8.014000 |
| 23:00:00+00:00 | 0.0 | 0.0 | 0.030 | 0.034 | 4.0 | -14.929487 | 0.122335 | 14.959500 | 0.0 | 0.0 | 2.968636 | 0.084 | 10.810000 | 17.564001 |
| 22:00:00+00:00 | 0.0 | 0.0 | 0.030 | 0.034 | 4.0 | -13.845597 | 0.628145 | 14.559500 | 0.0 | 0.0 | 2.189795 | 0.084 | 10.960000 | 22.314001 |
| 21:00:00+00:00 | 0.0 | 0.0 | 0.030 | 0.034 | 5.0 | -14.218192 | 0.903590 | 13.319500 | 0.0 | 0.0 | 1.938659 | 0.084 | 10.310000 | 22.714000 |
| 08:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.033 | 5.0 | -16.909462 | 0.116959 | 8.619500 | 0.0 | 0.0 | 1.297998 | 0.085 | 8.110000 | 6.914000 |
| 07:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.033 | 6.0 | -14.856331 | 0.099928 | 9.719500 | 0.0 | 0.0 | 1.297998 | 0.085 | 8.610001 | 7.914000 |
| 09:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.033 | 7.0 | -14.147417 | 0.099928 | 8.119500 | 0.0 | 0.0 | 2.741678 | 0.085 | 7.860000 | 5.864000 |
| 11:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.033 | 8.0 | -14.150644 | 0.115834 | 7.369500 | 0.0 | 0.0 | 1.609969 | 0.085 | 7.610000 | 5.214000 |
| 10:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.033 | 9.0 | -14.670770 | 0.099606 | 8.019500 | 0.0 | 0.0 | 1.440000 | 0.085 | 7.760000 | 5.314000 |
| 20:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.034 | 5.0 | -14.367883 | 0.694066 | 11.819500 | 0.0 | 0.0 | 0.804984 | 0.084 | 9.860001 | 22.814001 |
| 19:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.034 | 6.0 | -13.942364 | 0.703295 | 11.719500 | 0.0 | 0.0 | 2.160000 | 0.084 | 10.160000 | 23.464000 |
| 18:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.034 | 8.0 | -14.892031 | 0.661922 | 11.719500 | 0.0 | 0.0 | 1.835647 | 0.084 | 10.360001 | 20.364000 |
| 17:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.034 | 9.0 | -14.362545 | 0.448559 | 10.719500 | 0.0 | 0.0 | 0.509117 | 0.084 | 9.660000 | 18.064001 |
| 15:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.034 | 10.0 | -13.997224 | 0.102202 | 8.869500 | 0.0 | 0.0 | 1.138420 | 0.084 | 7.660000 | 7.414000 |
| 16:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.034 | 10.0 | -13.957830 | 0.276782 | 9.269500 | 0.0 | 0.0 | 0.720000 | 0.084 | 8.860001 | 13.864000 |
| 14:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.034 | 11.0 | -16.555761 | 0.100250 | 6.719500 | 0.0 | 0.0 | 0.720000 | 0.085 | 7.310000 | 3.214000 |
| 13:00:00+00:00 | 0.0 | 0.0 | 0.031 | 0.034 | 11.0 | -15.873536 | 0.065552 | 6.419500 | 0.0 | 0.0 | 1.484318 | 0.085 | 7.410000 | 3.664000 |
In [76]:
# Find Hourly Minimum values of Precipitation Probability (Plot)
dfhmin2 = df1.groupby(['hmstz'])[['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','relative_humidity_2m']].min().sort_values(by = ['hmstz'], ascending=[True])
dfhmin2.plot(subplots=[('precipitation_probability','cloud_cover_mid','relative_humidity_2m'),('soil_moisture_0_to_1cm','soil_moisture_1_to_3cm')])
Out[76]:
array([<Axes: xlabel='hmstz'>, <Axes: xlabel='hmstz'>], dtype=object)
In [77]:
# Find Hourly Minimum values of Relative Humidity 2m (Table)
dfhmin2 = df1.groupby(['hmstz'])[df1h.columns[[4,0]]].min().sort_values(by = ['relative_humidity_2m'], ascending=[True])
dfhmin2
Out[77]:
| relative_humidity_2m | precipitation_probability | |
|---|---|---|
| hmstz | ||
| 00:00:00+00:00 | 4.0 | 0.0 |
| 22:00:00+00:00 | 4.0 | 0.0 |
| 23:00:00+00:00 | 4.0 | 0.0 |
| 01:00:00+00:00 | 4.0 | 0.0 |
| 02:00:00+00:00 | 4.0 | 0.0 |
| 03:00:00+00:00 | 5.0 | 0.0 |
| 04:00:00+00:00 | 5.0 | 0.0 |
| 21:00:00+00:00 | 5.0 | 0.0 |
| 08:00:00+00:00 | 5.0 | 0.0 |
| 20:00:00+00:00 | 5.0 | 0.0 |
| 05:00:00+00:00 | 6.0 | 0.0 |
| 07:00:00+00:00 | 6.0 | 0.0 |
| 19:00:00+00:00 | 6.0 | 0.0 |
| 09:00:00+00:00 | 7.0 | 0.0 |
| 18:00:00+00:00 | 8.0 | 0.0 |
| 11:00:00+00:00 | 8.0 | 0.0 |
| 12:00:00+00:00 | 8.0 | 0.0 |
| 06:00:00+00:00 | 8.0 | 0.0 |
| 17:00:00+00:00 | 9.0 | 0.0 |
| 10:00:00+00:00 | 9.0 | 0.0 |
| 16:00:00+00:00 | 10.0 | 0.0 |
| 15:00:00+00:00 | 10.0 | 0.0 |
| 14:00:00+00:00 | 11.0 | 0.0 |
| 13:00:00+00:00 | 11.0 | 0.0 |
In [78]:
# Observation result:
# - When the minimum value of 'relative_humidity_2m' rises, the min value of 'soil_moisture_0_to_1cm' doesn't immediately rise, but instead, steadily almost flat. Maybe, it's due to part of liquid water in the soil vaporize and change into water vapour, so increase the relative humidity of air (near the soil).
# - When 'relative_humidity_2m' falls, the value of 'soil_moisture_0_to_1cm' doesn't immediately falls, but instead, in contrary, rises big enough. Maybe, it's due to part of water vapour in the air near the soil condensates and add liquid water in the soil so the soil moisture increases.
Mode value of a Column (for sample, we take 'temperature_2m' Column)¶
In [79]:
# Find Mode value if not rounded
dfmod = df1.mode().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfmod = dfmod.dropna()
dfmod
# The result is 17.4525 if we don't do any number rounding. But this result in the most cases, maybe is not the best mode at all.
Out[79]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 437 | 2025-03-19 05:00:00+00:00 | 20.8195 | 9.0 | -11.41719 | 1.279143 | 0.0 | 0.0 | 0.0 | 0.0 | 1007.1 | 937.6389 | 0.0 | 0.0 | 0.0 | 0.0 | 90000.0 | 7.42159 | 0.031 | 0.051 | 0.087 | 0.1 | 0.105 | 0.0 | 0.0 | 0.0 | 0.0 | 1.754564 | 3.319036 | 4.42464 | 4.024922 | 135.0001 | 180.0 | 180.0 | 135.0001 | 26.01 | 26.41 | 20.864 | 20.364 | 20.814001 | 23.414 | 25.114 | 10.440001 | Las Vegas | 2025-03-20 | 05:00:00+00:00 | 2025 | 05 | 01 | 05 | 00 | 00+00 | 00 |
In [80]:
dfmoda = df1[(df1['temperature_2m'] >= 17.45245) & (df1['temperature_2m'] < 17.45255)] #.where(df1['temperature_2m'] == 19)
dfmoda = dfmoda.dropna()
print(dfmoda['temperature_2m'])
len(dfmoda)
# The result: the temperature of 17.4525 only occurred 11 times out of 1651 data samples.
Series([], Name: temperature_2m, dtype: float64)
Out[80]:
0
In [81]:
# Find Mode value precise to 0 decimal number
dfmod0 = df1.round(0).mode().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfmod0 = dfmod0.dropna()
dfmod0
# The result is 19 if we round it to the nearest integer (0 decimal number)
Out[81]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 437 | 2025-03-19 05:00:00+00:00 | 22.0 | 9.0 | -3.0 | 19.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1008.0 | 940.0 | 0.0 | 0.0 | 0.0 | 0.0 | 90000.0 | 7.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | 7.0 | 6.0 | 7.0 | 135.0 | 180.0 | 180.0 | 223.0 | 27.0 | 26.0 | 25.0 | 18.0 | 22.0 | 23.0 | 23.0 | 10.0 | Las Vegas | 2025-03-20 | 05:00:00+00:00 | 2025 | 05 | 01 | 05 | 00 | 00+00 | 00 |
In [82]:
# Find out how many occurrences of 'temperature_2m' rounded to 19C (0 decimal number)
dfmod0a = df1[(df1['temperature_2m'] >= 18.5) & (df1['temperature_2m'] < 19.5)]
dfmod0a = dfmod0a.dropna()
print(dfmod0a['temperature_2m'])
# The result: temperature of 19C occurred 127 times out of 1651 data samples.
# This is far more occurences than if we use the 17.4525C (4 decimal numbers)
480 18.959500
481 19.259500
482 19.359500
483 19.259500
484 18.559500
...
1835 19.019500
1887 19.069500
1907 19.069500
1931 19.219501
1932 19.019500
Name: temperature_2m, Length: 82, dtype: float64
In [83]:
# Find Mode value of Precipitation Probability (Table) precise to 1 decimal number
dfmod1 = df1.round(1).mode().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfmod1 = dfmod1.dropna()
dfmod1
Out[83]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 437 | 2025-03-19 05:00:00+00:00 | 23.4 | 9.0 | -2.3 | 20.5 | 0.0 | 0.0 | 0.0 | 0.0 | 1007.1 | 939.7 | 0.0 | 0.0 | 0.0 | 0.0 | 90000.0 | 7.6 | 0.0 | 0.1 | 0.1 | 0.1 | 0.1 | 0.0 | 0.0 | 0.0 | 0.1 | 1.5 | 7.6 | 6.3 | 1.8 | 135.0 | 180.0 | 180.0 | 135.0 | 22.3 | 17.2 | 20.9 | 20.4 | 22.6 | 23.4 | 25.1 | 10.4 | Las Vegas | 2025-03-20 | 05:00:00+00:00 | 2025 | 05 | 01 | 05 | 00 | 00+00 | 00 |
In [84]:
# Find out how many occurrences of 'temperature_2m' rounded to 19.7C (1 decimal number)
dfmod1a = df1[(df1['temperature_2m'] >= 19.65) & (df1['temperature_2m'] < 19.75)] #.where(df1['temperature_2m'] == 19)
dfmod1a = dfmod1a.dropna()
print(dfmod1a['temperature_2m'])
len(dfmod1a)
# The result: the temperature of 19.7C occurred only 18 times out of 1651 data samples.
564 19.659500 639 19.719501 657 19.669500 919 19.719501 920 19.719501 1285 19.719501 1390 19.669500 1555 19.669500 1560 19.719501 1834 19.719501 Name: temperature_2m, dtype: float64
Out[84]:
10
In [85]:
# Find Mode value of Precipitation Probability (Table) precise to 2 decimal numbers
dfmod2 = df1.round(2).mode().sort_values(by = ['precipitation_probability','cloud_cover_mid','soil_moisture_0_to_1cm','soil_moisture_1_to_3cm','soil_moisture_3_to_9cm','cloud_cover_low',
'relative_humidity_2m','cloud_cover_high','dew_point_2m',
'vapour_pressure_deficit'], ascending=[False,False,False,False,False,False,False,False,False,False])
dfmod2 = dfmod2.dropna()
dfmod2
Out[85]:
| Unnamed: 0 | date | temperature_2m | relative_humidity_2m | dew_point_2m | apparent_temperature | precipitation_probability | precipitation | rain | showers | pressure_msl | surface_pressure | cloud_cover | cloud_cover_low | cloud_cover_mid | cloud_cover_high | visibility | wind_speed_10m | soil_moisture_0_to_1cm | soil_moisture_1_to_3cm | soil_moisture_3_to_9cm | soil_moisture_9_to_27cm | soil_moisture_27_to_81cm | evapotranspiration | snowfall | snow_depth | et0_fao_evapotranspiration | vapour_pressure_deficit | wind_speed_80m | wind_speed_120m | wind_speed_180m | wind_direction_10m | wind_direction_80m | wind_direction_120m | wind_direction_180m | temperature_80m | temperature_120m | temperature_180m | soil_temperature_0cm | soil_temperature_6cm | soil_temperature_18cm | soil_temperature_54cm | wind_gusts_10m | Location | ymd | hmstz | year | month | day | hour | minute | sectz | tzsec | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 437 | 2025-03-19 05:00:00+00:00 | 20.82 | 9.0 | -1.7 | 14.61 | 0.0 | 0.0 | 0.0 | 0.0 | 1007.1 | 941.44 | 0.0 | 0.0 | 0.0 | 0.0 | 90000.0 | 7.42 | 0.03 | 0.05 | 0.09 | 0.1 | 0.1 | 0.0 | 0.0 | 0.0 | 0.12 | 1.93 | 3.32 | 7.39 | 4.02 | 135.0 | 180.0 | 180.0 | 135.0 | 26.01 | 26.41 | 20.86 | 20.36 | 20.81 | 23.41 | 25.11 | 10.44 | Las Vegas | 2025-03-20 | 05:00:00+00:00 | 2025 | 05 | 01 | 05 | 00 | 00+00 | 00 |
In [86]:
# Find out how many occurrences of 'temperature_2m' rounded to 17.45C (2 decimal numbers)
dfmod2a = df1[(df1['temperature_2m'] >= 17.445) & (df1['temperature_2m'] < 17.455)] #.where(df1['temperature_2m'] == 19)
dfmod2a = dfmod2a.dropna()
print(dfmod2a['temperature_2m'])
len(dfmod2a)
# The result: the temperature of 17.45C occurred only 11 times out of 1651 data samples. This is perfectly the same as if we don't round it.
Series([], Name: temperature_2m, dtype: float64)
Out[86]:
0
In [87]:
# CONCLUSION: to get the nearest possible Mode value of a Column (in this sample, we take 'temperature_2m'), we can't just take the real values
# but, instead, maybe we have to round it to the nearest integer/whole number.
Standard Deviation and Outliers of Precipitation Probability¶
In [88]:
# Function Definition
def find_outliers_iqr(series):
Q1 = series.quantile(0.25)
print('Q1 = ', Q1)
Q2 = series.quantile(0.50)
print('Q2 = ', Q2)
Q3 = series.quantile(0.75)
print('Q3 = ', Q3)
IQR = Q3 - Q1
lower_bound = Q1 - 1.5 * IQR
print('Lower Bound = ', lower_bound)
upper_bound = Q3 + 1.5 * IQR
print('Upper Bound = ', upper_bound)
outliers = series[(series < lower_bound) | (series > upper_bound)]
return outliers
In [89]:
##### Standard Deviation of 'precipitation_probability'
df1stdevpp = df1[['precipitation_probability']].std() #.sort_values(ascending=False)
df1stdevpp
Out[89]:
precipitation_probability 5.284249 dtype: float64
In [90]:
df1[['precipitation_probability']].mean()
Out[90]:
precipitation_probability 1.236769 dtype: float64
In [91]:
# Distribution Plot of 'precipitation_probability'
df1[['precipitation_probability']].plot.kde()
Out[91]:
<Axes: ylabel='Density'>
In [92]:
##### Outliers of 'precipitation_probability'
# Quantiles of 'precipitation_probability'
# Note: either Lower Bound and/or Upper Bound doesn't have to be exist at all in the Dataset.
find_outliers_iqr(df1[['precipitation_probability']]).dropna().sort_values(by='precipitation_probability', ascending=False)
Q1 = precipitation_probability 0.0 Name: 0.25, dtype: float64 Q2 = precipitation_probability 0.0 Name: 0.5, dtype: float64 Q3 = precipitation_probability 0.0 Name: 0.75, dtype: float64 Lower Bound = precipitation_probability 0.0 dtype: float64 Upper Bound = precipitation_probability 0.0 dtype: float64
Out[92]:
| precipitation_probability | |
|---|---|
| 1583 | 70.0 |
| 1590 | 54.0 |
| 1589 | 53.0 |
| 1582 | 51.0 |
| 1552 | 47.0 |
| ... | ... |
| 1176 | 1.0 |
| 1175 | 1.0 |
| 1174 | 1.0 |
| 1173 | 1.0 |
| 524 | 1.0 |
342 rows × 1 columns
In [93]:
# Box Plot of 'precipitation_probability'
# Note:
# Top black horizontal line is Upper Bound value
# Bottom black horizontal line is Lower Bound value
# Lower blue line is 25 percentile
# Green line is Median a.k.a 50 percentile
# Upper blue line is 75 percentile
df1[['precipitation_probability']].boxplot()
Out[93]:
<Axes: >
In [94]:
# Draw the Scatter Plot of 'precipitation_probability'
# We can see that the dots are concentrated at below/bottom (below the Upper Bound of 17.5)
import matplotlib.pyplot as plt
df1[['date','precipitation_probability']].plot.scatter(x='date', y='precipitation_probability', s=1, c='green')
Out[94]:
<Axes: xlabel='date', ylabel='precipitation_probability'>
In [95]:
##### Standard Deviation of 'temperature_2m'
df1stdevtemp = df1[['temperature_2m']].std() #.sort_values(ascending=False)
df1stdevtemp
Out[95]:
temperature_2m 6.807911 dtype: float64
In [96]:
df1[['temperature_2m']].mean()
Out[96]:
temperature_2m 23.069952 dtype: float64
In [97]:
df1[['temperature_2m']].max()
Out[97]:
temperature_2m 41.3195 dtype: float64
In [98]:
df1[['temperature_2m']].plot.kde()
Out[98]:
<Axes: ylabel='Density'>
In [99]:
##### Outliers of 'temperature_2m'
# Quantiles of 'temperature_2m'
# Note: either Lower Bound and/or Upper Bound doesn't have to be exist at all in the Dataset.
outlierstemp = find_outliers_iqr(df1[['temperature_2m']])
print(outlierstemp.dropna().sort_values(by='temperature_2m', ascending=False))
Q1 = temperature_2m 17.7695 Name: 0.25, dtype: float64 Q2 = temperature_2m 22.7695 Name: 0.5, dtype: float64 Q3 = temperature_2m 27.844501 Name: 0.75, dtype: float64 Lower Bound = temperature_2m 2.656999 dtype: float64 Upper Bound = temperature_2m 42.957001 dtype: float64 Empty DataFrame Columns: [temperature_2m] Index: []
In [100]:
# Box Plot of 'temperature_2m'
# Note:
# Top black horizontal line is Upper Bound value
# Bottom black horizontal line is Lower Bound value
# Lower blue line is 25 percentile
# Green line is Median a.k.a 50 percentile
# Upper blue line is 75 percentile
df1[['temperature_2m']].boxplot()
Out[100]:
<Axes: >
In [101]:
# Draw the Scatter Plot of 'temperature_2m'
# We can see that the dots are concentrated at below/bottom (below the Upper Bound of 17.5)
import matplotlib.pyplot as plt
df1[['date','temperature_2m']].plot.scatter(x='date', y='temperature_2m', s=1, c='green')
Out[101]:
<Axes: xlabel='date', ylabel='temperature_2m'>
In [ ]:
In [ ]:
In [102]:
# Boxplot of all Columns
df1[df1.columns[1:9]].boxplot()
Out[102]:
<Axes: >
In [103]:
df1[df1.columns[9:17]].boxplot()
Out[103]:
<Axes: >
In [104]:
df1[df1.columns[17:26]].boxplot()
Out[104]:
<Axes: >
In [105]:
df1[df1.columns[26:35]].boxplot()
Out[105]:
<Axes: >
In [106]:
df1[df1.columns[35:]].boxplot()
Out[106]:
<Axes: >
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: